Code
library(tidyverse)
library(tidymodels)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)
library(Polychrome)
library(forcats)library(tidyverse)
library(tidymodels)
library(discrim)
library(rpart)
library(rpart.plot)
library(baguette)
library(tidyclust)
library(caret)
library(RColorBrewer)
library(Polychrome)
library(forcats)sample_grid <- matrix(c("Bear", "Bee", "Meadow", "Bear", "Meadow", "Meadow", "Bee", "Meadow", "Bee"),3,3,byrow=TRUE)
sample_grid [,1] [,2] [,3]
[1,] "Bear" "Bee" "Meadow"
[2,] "Bear" "Meadow" "Meadow"
[3,] "Bee" "Meadow" "Bee"
sample_grid2 <- matrix(c("Meadow", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Bee", "Meadow", "Meadow"),3,3,byrow=TRUE)
sample_grid2 [,1] [,2] [,3]
[1,] "Meadow" "Meadow" "Bee"
[2,] "Meadow" "Bee" "Meadow"
[3,] "Bee" "Meadow" "Meadow"
big_grid1 <- matrix(c("Deer", "Meadow", "Bee", "Bear", "Fox", "Wolf", "Meadow", "Meadow", "Trout", "Stream",
"Deer", "Eagle", "Meadow", "Trout", "Stream", "Fox", "Rabbit", "Stream", "Dragonfly",
"Stream"),4,5,byrow=TRUE)
big_grid1 [,1] [,2] [,3] [,4] [,5]
[1,] "Deer" "Meadow" "Bee" "Bear" "Fox"
[2,] "Wolf" "Meadow" "Meadow" "Trout" "Stream"
[3,] "Deer" "Eagle" "Meadow" "Trout" "Stream"
[4,] "Fox" "Rabbit" "Stream" "Dragonfly" "Stream"
twenty_seven_x <- rep("x", 27)
super_vec <- append(twenty_seven_x,
c("x", "x",
"Deer", "Meadow", "Bee", "Bear", "Fox",
"x", "x",
"x", "x",
"Wolf", "Meadow", "Meadow", "Trout", "Stream",
"x", "x",
"x", "x",
"Deer", "Eagle", "Meadow", "Trout", "Stream",
"x", "x",
"x", "x",
"Fox", "Rabbit", "Stream", "Dragonfly","Stream",
"x", "x"))
super_grid1 <- matrix(super_vec,7,9,byrow=TRUE)
super_grid1 [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[2,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[3,] "x" "x" "x" "x" "x" "x" "x" "x" "x"
[4,] "x" "x" "Deer" "Meadow" "Bee" "Bear" "Fox" "x" "x"
[5,] "x" "x" "Wolf" "Meadow" "Meadow" "Trout" "Stream" "x" "x"
[6,] "x" "x" "Deer" "Eagle" "Meadow" "Trout" "Stream" "x" "x"
[7,] "x" "x" "Fox" "Rabbit" "Stream" "Dragonfly" "Stream" "x" "x"
cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
generate_grid <- function(pool, partial_grid = NULL){
if(is.null(partial_grid) == FALSE){
blanks <- which(partial_grid == "x", TRUE)
n = 20 - nrow(blanks)
}else{
n = 20
}
sample <- sample(pool, n)
if(is.null(partial_grid) == FALSE){
board <- partial_grid
for (i in 1:nrow(blanks)){
loc <- c(blanks[[i, 1]], blanks[[i, 2]])
board[blanks[[i, 1]], blanks[[i, 2]]] = sample[i]
}
}else{
board <- matrix(sample, nrow=4, ncol=5, byrow=TRUE)
}
return(board)
}
generate_grid(cards) [,1] [,2] [,3] [,4] [,5]
[1,] "Eagle" "Stream" "Meadow" "Trout" "Dragonfly"
[2,] "Trout" "Trout" "Eagle" "Meadow" "Bear"
[3,] "Stream" "Wolf" "Meadow" "Stream" "Trout"
[4,] "Meadow" "Fox" "Fox" "Meadow" "Deer"
find_cardinals <- function(i, j, grid){
cardinals <- list()
maxrow = nrow(grid)
maxcol = ncol(grid)
if(i+1 <= maxrow){
cardinals[[length(cardinals)+1]] <- c(i+1,j)
}
if(i-1 > 0){
cardinals[[length(cardinals)+1]] <- c(i-1,j)
}
if(j+1 <= maxcol){
cardinals[[length(cardinals)+1]] <- c(i,j+1)
}
if(j-1 > 0){
cardinals[[length(cardinals)+1]] <- c(i,j-1)
}
return(cardinals)
}find_two_spaces <- function(i, j, grid){
two_space <- list()
maxrow = nrow(grid)
maxcol = ncol(grid)
if(i+1 <= maxrow){
two_space[[length(two_space)+1]] <- c(i+1,j)
}
if(i+2 <= maxrow){
two_space[[length(two_space)+1]] <- c(i+2,j)
}
if(i-1 > 0){
two_space[[length(two_space)+1]] <- c(i-1,j)
}
if(i-2 > 0){
two_space[[length(two_space)+1]] <- c(i-2,j)
}
if(j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i,j+1)
}
if(j+2 <= maxcol){
two_space[[length(two_space)+1]] <- c(i,j+2)
}
if(j-1 > 0){
two_space[[length(two_space)+1]] <- c(i,j-1)
}
if(j-2 > 0){
two_space[[length(two_space)+1]] <- c(i,j-2)
}
if(i+1 <= maxrow && j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i+1,j+1)
}
if(i+1 <= maxrow && j-1 > 0){
two_space[[length(two_space)+1]] <- c(i+1,j-1)
}
if(i-1 > 0 && j+1 <= maxcol){
two_space[[length(two_space)+1]] <- c(i-1,j+1)
}
if(i-1 > 0 && j-1 > 0){
two_space[[length(two_space)+1]] <- c(i-1,j-1)
}
return(two_space)
}find_more_meadows <- function(i, j, grid, meadow_list){
meadow_list[[length(meadow_list)+1]] = as.double(c(i,j))
current_caridnals <- find_cardinals(i, j, grid)
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Meadow"){
if ((list(k) %in% meadow_list) == FALSE){
meadow_list = find_more_meadows(as.double(k[1]),as.double(k[2]), grid, meadow_list)
}
}
}
return(meadow_list)
}find_more_streams <- function(i, j, grid, stream_list){
stream_list[[length(stream_list)+1]] = as.double(c(i,j))
current_caridnals <- find_cardinals(i, j, grid)
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Stream"){
if ((list(k) %in% stream_list) == FALSE){
stream_list = find_more_streams(as.double(k[1]),as.double(k[2]), grid, stream_list)
}
}
}
return(stream_list)
}score_grid <- function(grid, individual=FALSE){
score = 0
meadow_patch = list()
first_meadow = TRUE
stream_patch = list()
first_stream = TRUE
dragonfly_list = list()
deer_row <- c()
deer_col <- c()
# first_wolf = TRUE
num_wolves = 0
bear_score = 0
bee_score = 0
meadow_score = 0
trout_score = 0
eagle_score = 0
rabbit_score = 0
dragonfly_score = 0
fox_score = 0
deer_score = 0
stream_score = 0
wolf_score = 0
diversity_score = 0
for (i in 1:nrow(grid)) {
for (j in 1:ncol(grid)) {
current_caridnals <- find_cardinals(i, j, grid)
if(grid[i,j] == "Bear"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Bee" || grid[k[1],k[2]] == "Trout"){
score = score + 2
bear_score = bear_score + 2
}
}
}
if(grid[i,j] == "Bee"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Meadow"){
score = score + 3
bee_score = bee_score + 3
}
}
}
if(grid[i,j] == "Meadow"){
if (first_meadow == TRUE){
first_meadow = FALSE
first_patch = list()
completed_patch = find_more_meadows(as.double(i), as.double(j), grid, first_patch)
meadow_patch[[length(meadow_patch)+1]] = completed_patch
}else{
exist = FALSE
for (x in 1:length(meadow_patch)){
if (list(as.double(c(i,j))) %in% meadow_patch[[x]]){
exist = TRUE
}
}
if (exist == FALSE){
new_patch = list()
completed_patch = find_more_meadows(as.double(i), as.double(j), grid, new_patch)
meadow_patch[[length(meadow_patch)+1]] = completed_patch
}
}
}
if(grid[i,j] == "Trout"){
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Dragonfly" || grid[k[1],k[2]] == "Stream"){
score = score + 2
trout_score = trout_score + 2
}
}
}
if(grid[i,j] == "Eagle"){
two_space <- find_two_spaces(i, j, grid)
for (k in two_space){
if (grid[k[1],k[2]] == "Trout" || grid[k[1],k[2]] == "Rabbit"){
score = score + 2
eagle_score = eagle_score + 2
}
}
}
if(grid[i,j] == "Rabbit"){
score = score + 1
rabbit_score = rabbit_score + 1
}
if(grid[i,j] == "Dragonfly"){
dragonfly_list[[length(dragonfly_list)+1]] = as.double(c(i,j))
}
if(grid[i,j] == "Fox"){
score_it = TRUE
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Bear" || grid[k[1],k[2]] == "Wolf"){
score_it = FALSE
}
}
if (score_it){
score = score + 3
fox_score = fox_score + 3
}
}
if(grid[i,j] == "Deer"){
deer_row <- append(deer_row, i)
deer_col <- append(deer_col, j)
}
if(grid[i,j] == "Stream"){
if (first_stream == TRUE){
first_stream = FALSE
first_patch = list()
completed_patch = find_more_streams(as.double(i), as.double(j), grid, first_patch)
stream_patch[[length(stream_patch)+1]] = completed_patch
}else{
exist = FALSE
for (x in 1:length(stream_patch)){
if (list(as.double(c(i,j))) %in% stream_patch[[x]]){
exist = TRUE
}
}
if (exist == FALSE){
new_patch = list()
completed_patch = find_more_streams(as.double(i), as.double(j), grid, new_patch)
stream_patch[[length(stream_patch)+1]] = completed_patch
}
}
}
if(grid[i,j] == "Wolf"){
# temporary stand in, can only be scored properly with more than 1 player
# if (first_wolf == TRUE){
# score = score + 8
# wolf_score = wolf_score + 8
# first_wolf == FALSE
# }
num_wolves = num_wolves + 1
}
}
}
for (i in meadow_patch){
if (length(i) == 2){
score = score + 3
meadow_score = meadow_score + 3
}else if (length(i) == 3){
score = score + 6
meadow_score = meadow_score + 6
}else if (length(i) == 4){
score = score + 10
meadow_score = meadow_score + 10
}else if (length(i) >= 5){
score = score + 15
meadow_score = meadow_score + 15
}
}
largest_stream = 0
for (i in stream_patch){
if (length(i) > largest_stream){
largest_stream = length(i)
}
}
# temporary scoring for largest stream, can only be scored properly with more than 1 player
# if (largest_stream > 0){
# score = score + 5
# stream_score = stream_score + 5
# }
for (d in dragonfly_list){
current_caridnals <- find_cardinals(d[1], d[2], grid)
largest_score = 0
for (k in current_caridnals){
if (grid[k[1],k[2]] == "Stream"){
for (s in stream_patch){
if((list(k) %in% s) == TRUE){
# OLD scoring below, incorrect
# current_score = 2 * length(s)
current_score = length(s) # NEW scoring, corrected
if (current_score > largest_score){
largest_score = current_score
}
}
}
}
}
score = score + largest_score
dragonfly_score = dragonfly_score + largest_score
}
score = score + 2*length(unique(deer_row))
score = score + 2*length(unique(deer_col))
deer_score = deer_score + 2*length(unique(deer_row))
deer_score = deer_score + 2*length(unique(deer_col))
diversity_matrix <- matrix(c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
dragonfly_score, fox_score, deer_score))
diversity_vector <- c(bear_score, bee_score, meadow_score, trout_score, eagle_score, rabbit_score,
dragonfly_score, fox_score, deer_score)
gaps = colSums(diversity_matrix == 0)[1]
# if(gaps >= 6){
# score = score - 5
# diversity_score = -5
# }else if (gaps == 4){
# score = score + 3
# diversity_score = 3
# }else if (gaps == 3){
# score = score + 7
# diversity_score = 7
# }else if (gaps <= 2){
# score = score + 12
# diversity_score = 12
# }
if(individual==TRUE){
if(largest_stream == 0){
gaps = gaps + 1
}
if(num_wolves == 0){
gaps = gaps + 1
}
if(gaps >= 6){
dv_score = -5
}else if (gaps == 4){
dv_score = 3
}else if (gaps == 3){
dv_score = 7
}else if (gaps <= 2){
dv_score = 12
}else{
dv_score = 0
}
return(c(diversity_vector,largest_stream, num_wolves, dv_score))
}else{
return(c(score, largest_stream, num_wolves, gaps))
}
}solo_score <- function(score_vector){
score = score_vector[1]
stream_size = score_vector[2]
num_wolves = score_vector[3]
num_gaps = score_vector[4]
score = score + stream_size + num_wolves
if(stream_size == 0){
num_gaps = num_gaps + 1
}
if(num_wolves == 0){
num_gaps = num_gaps + 1
}
if(num_gaps >= 6){
score = score - 5
}else if (num_gaps == 4){
score = score + 3
}else if (num_gaps == 3){
score = score + 7
}else if (num_gaps <= 2){
score = score + 12
}
return(score)
}mp_score <- function(score_list){
# each entry in score_list follows the format: c(score, size_of_largest_stream, num_wolves, diversity_gaps)
if(length(score_list) > 2){
more_than_2 = TRUE
}else{
more_than_2 = FALSE
}
score <- rep(0, length(score_list))
stream_size <- rep(0, length(score_list))
num_wolves <- rep(0, length(score_list))
num_gaps <- rep(0, length(score_list))
for (i in 1:length(score_list)){
score[i] = score_list[[i]][1]
stream_size[i] = score_list[[i]][2]
num_wolves[i] = score_list[[i]][3]
num_gaps[i] = score_list[[i]][4] + 2
}
print(score)
stream_size = sort(stream_size, decreasing = TRUE)
num_wolves = sort(num_wolves, decreasing = TRUE)
stream_matrix = matrix(stream_size)
wolf_matrix = matrix(num_wolves)
largest_stream = stream_size[1]
second_stream = stream_size[2]
score_largest_s = TRUE
score_second_s = TRUE
if(colSums(stream_matrix == largest_stream)[1] > 1){
score_second_s = FALSE
}
for (i in 1:length(score_list)){
if (score_list[[i]][2] == largest_stream && largest_stream != 0){
score[i] = score[i] + 8
num_gaps[i] = num_gaps[i] - 1
}
}
if(score_second_s == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][2] == second_stream && second_stream != 0){
score[i] = score[i] + 5
num_gaps[i] = num_gaps[i] - 1
}
}
}
print(score)
most_wolves = num_wolves[1]
second_wolves = num_wolves[2]
if(more_than_2){
third_wolves = num_wolves[3]
}
score_most_w = TRUE
score_second_w = TRUE
score_third_w = TRUE
if(colSums(wolf_matrix == most_wolves)[1] > 1){
score_second_w = FALSE
if(colSums(wolf_matrix == most_wolves)[1] > 2){
score_third_w = FALSE
}
}
for (i in 1:length(score_list)){
if (score_list[[i]][3] == most_wolves){
score[i] = score[i] + 12
num_gaps[i] = num_gaps[i] - 1
}
}
if(colSums(wolf_matrix == second_wolves)[1] > 1){
score_third_w = FALSE
}
if(score_second_w == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][3] == second_wolves){
score[i] = score[i] + 8
num_gaps[i] = num_gaps[i] - 1
}
}
}
if(score_third_w == TRUE && more_than_2 == TRUE){
for (i in 1:length(score_list)){
if (score_list[[i]][3] == third_wolves){
score[i] = score[i] + 4
num_gaps[i] = num_gaps[i] - 1
}
}
}
print(score)
for(i in 1:length(score_list)){
if(num_gaps[i] >= 6){
score[i] = score[i] - 5
}else if (num_gaps[i] == 4){
score[i] = score[i] + 3
}else if (num_gaps[i] == 3){
score[i] = score[i] + 7
}else if (num_gaps[i] <= 2){
score[i] = score[i] + 12
}
}
return(score)
}baseline_sim <- function(cards, n = 10000){
all_scores <- c()
for (i in 1:n){
sim_grid <- generate_grid(cards)
all_scores <- c(all_scores, solo_score(score_grid(sim_grid)))
}
return(all_scores)
}rw_mcmc <- function(grid, iterations = 1000, acceptance_func = "simple", beta = 0, bp = 500, original = NULL, record_board = FALSE){
start_score <- solo_score(score_grid(grid))
start_grid <- grid
current_grid <- grid
continue <- TRUE
rows <- rep(1:nrow(grid))
cols <- rep(1:ncol(grid))
iter = 0
score_vector <- c()
highest_score <- start_score
highest_grid <- grid
highest_iter <- 0
while(continue){
current_score <- solo_score(score_grid(current_grid))
if(bp == 0){
}else if(iter%%bp == 0){
if(acceptance_func == "annealing dynamic" ||
acceptance_func == "delayed"){
current_grid = highest_grid
current_score = highest_score
}
}
score_vector = c(score_vector, current_score)
if(current_score > highest_score){
highest_score = current_score
highest_grid = current_grid
highest_iter = iter
}
# Choosing swap locations if grid is partially complete
if(is.null(original) == FALSE){
blanks <- which(original == "x", TRUE)
possible <- list()
for (i in 1:nrow(blanks)){
loc <- c(blanks[[i, 1]], blanks[[i, 2]])
possible[[length(possible)+1]] = loc
}
start_loc <- sample(possible, 1)
start_loc <- start_loc[[1]]
start_row <- start_loc[1]
start_col <- start_loc[2]
end_loc <- sample(possible, 1)
end_loc <- end_loc[[1]]
end_row <- end_loc[1]
end_col <- end_loc[2]
while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
end_loc <- sample(possible, 1)
end_loc <- end_loc[[1]]
end_row <- end_loc[1]
end_col <- end_loc[2]
}
# Choosing swap locations without partial grid
}else{
start_row <- sample(rows, 1)
start_col <- sample(cols, 1)
# Random swap anywhere
end_row <- sample(rows, 1)
end_col <- sample(cols, 1)
while(current_grid[start_row, start_col] == current_grid[end_row, end_col]){
end_row <- sample(rows, 1)
end_col <- sample(cols, 1)
}
}
# Adjacent swap only
# possible_end <- find_cardinals(start_row, start_col, grid)
# end <- sample(possible_end, 1)
# end_row <- end[[1]][1]
# end_col <- end[[1]][2]
proposed_grid <- current_grid
# if(iter == 347 || iter == 668){
# print(proposed_grid)
# }
start <- current_grid[start_row, start_col]
end <- current_grid[end_row, end_col]
proposed_grid[start_row, start_col] = end
proposed_grid[end_row, end_col] = start
proposed_score <- solo_score(score_grid(proposed_grid))
if(acceptance_func == "simple"){
p = proposed_score
c = current_score
if(p <= 0){
p = p + -1*p + 1
c = c + -1*p + 1
}
if(c <= 0){
c = c + -1*c + 1
p = p + -1*c + 1
}
x <- runif(1,0,1)
if(x < p/c){
current_grid <- proposed_grid
}
}
if(acceptance_func == "annealing"){
p = proposed_score
c = current_score
x <- runif(1,0,1)
if(x < exp(beta*p)/exp(beta*c)){
current_grid <- proposed_grid
}
}
if(acceptance_func == "annealing dynamic"){
p = proposed_score
c = current_score
factor = floor(iter/bp) + 1
b = beta*(factor)
x <- runif(1,0,1)
if(x < exp(b*p)/exp(b*c)){
current_grid <- proposed_grid
}
}
if(acceptance_func == "delayed"){
p = proposed_score
c = current_score
if(p <= 0){
p = p + -1*p + 1
c = c + -1*p + 1
}
if(c <= 0){
c = c + -1*c + 1
p = p + -1*c + 1
}
x <- runif(1,0,1)
if(x < p/c && p/c < 1){
p2 = proposed_score
c2 = current_score
factor = floor(iter/500) + 1
b = beta*(factor)
x <- runif(1,0,1)
if(x < (exp(b*p2)*c)/(exp(b*c2)*p)){
current_grid <- proposed_grid
}
}else if(p/c >= 1){
current_grid <- proposed_grid
}
}
iter = iter + 1
if(iter >= iterations){
continue = FALSE
}
}
final_score <- solo_score(score_grid(current_grid))
if(final_score > highest_score){
highest_score = final_score
highest_grid = grid
}
if(record_board == TRUE){
return(c(t(highest_grid), highest_score, t(start_grid), start_score))
}else{
return(c(highest_score, final_score, start_score, highest_iter, data.frame(score_vector)))
}
}multi_mcmc <- function(iterations, n, acceptance_func = "simple", beta = 0, bp = 500, grid = NULL, boardlist = NULL, record_board = FALSE, cards = NULL, card_name = NULL){
first = TRUE
start_scores <- c()
highest_scores <- c()
highest_iter <- c()
if(is.null(cards) == TRUE){
cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
if(is.null(card_name) == TRUE){
card_name = "default"
}
}
# Creating proper pool of cards if grid is partially complete
if(is.null(grid) == FALSE){
df <- as.data.frame(table(grid))
animals <- levels(df$grid)
for(i in 1:nrow(grid)){
animal <- animals[i]
num = length(cards[cards == animal])
cards = cards[!cards == animal]
cards <- c(cards, rep(animal, num - df[i,2]))
}
}
for(i in 1:n){
if(is.null(grid) == FALSE){
if(record_board == TRUE){
sim_grid <- generate_grid(cards, grid)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid, record_board = TRUE)
}else{
sim_grid <- generate_grid(cards, grid)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, grid)
}
}else if(is.null(boardlist) == FALSE){
if(record_board == TRUE){
run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp, record_board = TRUE)
}else{
run <- rw_mcmc(boardlist[[i]], iterations, acceptance_func, beta, bp)
}
}else{
if(record_board == TRUE){
sim_grid <- generate_grid(cards)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp, record_board = TRUE)
}else{
sim_grid <- generate_grid(cards)
run <- rw_mcmc(sim_grid, iterations, acceptance_func, beta, bp)
}
}
if(record_board == TRUE){
if(first == TRUE){
first = FALSE
df <- data.frame(
row1col1 = run[[1]],
row1col2 = run[[2]],
row1col3 = run[[3]],
row1col4 = run[[4]],
row1col5 = run[[5]],
row2col1 = run[[6]],
row2col2 = run[[7]],
row2col3 = run[[8]],
row2col4 = run[[9]],
row2col5 = run[[10]],
row3col1 = run[[11]],
row3col2 = run[[12]],
row3col3 = run[[13]],
row3col4 = run[[14]],
row3col5 = run[[15]],
row4col1 = run[[16]],
row4col2 = run[[17]],
row4col3 = run[[18]],
row4col4 = run[[19]],
row4col5 = run[[20]],
score = run[[21]],
pool = card_name,
start_score = run[[42]],
row1col1_s = run[[22]],
row1col2_s = run[[23]],
row1col3_s = run[[24]],
row1col4_s = run[[25]],
row1col5_s = run[[26]],
row2col1_s = run[[27]],
row2col2_s = run[[28]],
row2col3_s = run[[29]],
row2col4_s = run[[30]],
row2col5_s = run[[31]],
row3col1_s = run[[32]],
row3col2_s = run[[33]],
row3col3_s = run[[34]],
row3col4_s = run[[35]],
row3col5_s = run[[36]],
row4col1_s = run[[37]],
row4col2_s = run[[38]],
row4col3_s = run[[39]],
row4col4_s = run[[40]],
row4col5_s = run[[41]]
)
}else{
row <- data.frame(
row1col1 = run[[1]],
row1col2 = run[[2]],
row1col3 = run[[3]],
row1col4 = run[[4]],
row1col5 = run[[5]],
row2col1 = run[[6]],
row2col2 = run[[7]],
row2col3 = run[[8]],
row2col4 = run[[9]],
row2col5 = run[[10]],
row3col1 = run[[11]],
row3col2 = run[[12]],
row3col3 = run[[13]],
row3col4 = run[[14]],
row3col5 = run[[15]],
row4col1 = run[[16]],
row4col2 = run[[17]],
row4col3 = run[[18]],
row4col4 = run[[19]],
row4col5 = run[[20]],
score = run[[21]],
pool = card_name,
start_score = run[[42]],
row1col1_s = run[[22]],
row1col2_s = run[[23]],
row1col3_s = run[[24]],
row1col4_s = run[[25]],
row1col5_s = run[[26]],
row2col1_s = run[[27]],
row2col2_s = run[[28]],
row2col3_s = run[[29]],
row2col4_s = run[[30]],
row2col5_s = run[[31]],
row3col1_s = run[[32]],
row3col2_s = run[[33]],
row3col3_s = run[[34]],
row3col4_s = run[[35]],
row3col5_s = run[[36]],
row4col1_s = run[[37]],
row4col2_s = run[[38]],
row4col3_s = run[[39]],
row4col4_s = run[[40]],
row4col5_s = run[[41]]
)
df <- rbind(df, row)
}
}else{
start_scores <- c(start_scores, run[[3]])
highest_scores <- c(highest_scores, run[[1]])
highest_iter <- c(highest_iter, run[[4]])
}
}
if(record_board == TRUE){
return(df)
}else{
return(list(start_scores, highest_scores, highest_iter))
}
}# set.seed(4)
# x1 <- multi_mcmc(2000, 100, "simple")# mean(x1[[1]])
# sd(x1[[1]])
# var(x1[[1]])
# max(x1[[1]])
# min(x1[[1]])
# summary(x1[[1]])# mean(x1[[2]])
# sd(x1[[2]])
# var(x1[[2]])
# max(x1[[2]])
# min(x1[[2]])
# summary(x1[[2]])
# summary(x1[[3]])# set.seed(4)
# x2 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)# mean(x2[[1]])
# sd(x2[[1]])
# var(x2[[1]])
# max(x2[[1]])
# min(x2[[1]])
# summary(x2[[1]])# mean(x2[[2]])
# sd(x2[[2]])
# var(x2[[2]])
# max(x2[[2]])
# min(x2[[2]])
# summary(x2[[2]])
# summary(x2[[3]])# set.seed(4)
# x3 <- multi_mcmc(2000, 100, "annealing dynamic", 0.08, 250)# mean(x3[[1]])
# sd(x3[[1]])
# var(x3[[1]])
# max(x3[[1]])
# min(x3[[1]])
# summary(x3[[1]])# mean(x3[[2]])
# sd(x3[[2]])
# var(x3[[2]])
# max(x3[[2]])
# min(x3[[2]])
# summary(x3[[2]])
# summary(x3[[3]])# set.seed(4)
# x4 <- multi_mcmc(2000, 100, "annealing dynamic", 0.2, 250)# mean(x4[[1]])
# sd(x4[[1]])
# var(x4[[1]])
# max(x4[[1]])
# min(x4[[1]])
# summary(x4[[1]])# mean(x4[[2]])
# sd(x4[[2]])
# var(x4[[2]])
# max(x4[[2]])
# min(x4[[2]])
# summary(x4[[2]])# set.seed(49)
# x5 <- multi_mcmc(2000, 100, "annealing", 0.8, 250)# mean(x5[[1]])
# sd(x5[[1]])
# var(x5[[1]])
# max(x5[[1]])
# min(x5[[1]])
# summary(x5[[1]])# mean(x5[[2]])
# sd(x5[[2]])
# var(x5[[2]])
# max(x5[[2]])
# min(x5[[2]])
# summary(x5[[2]])
# summary(x5[[3]])# set.seed(49)
# x6 <- multi_mcmc(2000, 100, "delayed", 0.2, 250)# mean(x6[[1]])
# sd(x6[[1]])
# var(x6[[1]])
# max(x6[[1]])
# min(x6[[1]])
# summary(x6[[1]])# mean(x6[[2]])
# sd(x6[[2]])
# var(x6[[2]])
# max(x6[[2]])
# min(x6[[2]])
# summary(x6[[2]])
# summary(x6[[3]])# set.seed(49)
# x7 <- multi_mcmc(1000, 1000, "annealing", 0.8, 250)# mean(x7[[1]])
# sd(x7[[1]])
# var(x7[[1]])
# max(x7[[1]])
# min(x7[[1]])
# summary(x7[[1]])# mean(x7[[2]])
# sd(x7[[2]])
# var(x7[[2]])
# max(x7[[2]])
# min(x7[[2]])
# summary(x7[[2]])
# summary(x7[[3]])test_grid1 <- matrix(c("x", "Bee", "Meadow", "x", "x", "x", "Meadow", "Meadow", "x", "x",
"Deer", "Bee", "Meadow", "x", "x", "x", "Bear", "Bee", "Deer",
"x"),4,5,byrow=TRUE)
test_grid1 [,1] [,2] [,3] [,4] [,5]
[1,] "x" "Bee" "Meadow" "x" "x"
[2,] "x" "Meadow" "Meadow" "x" "x"
[3,] "Deer" "Bee" "Meadow" "x" "x"
[4,] "x" "Bear" "Bee" "Deer" "x"
# set.seed(4)
# partial1 <- multi_mcmc(1000, 100, "annealing", 0.8, 200, test_grid1)# mean(partial1[[1]])
# sd(partial1[[1]])
# var(partial1[[1]])
# max(partial1[[1]])
# min(partial1[[1]])
# summary(partial1[[1]])# mean(partial1[[2]])
# sd(partial1[[2]])
# var(partial1[[2]])
# max(partial1[[2]])
# min(partial1[[2]])
# summary(partial1[[2]])
# summary(partial1[[3]])tune <- function(iter, beta, bp, type, boardlist = NULL){
beta_df <- c()
bp_df <- c()
iter_df <- c()
start_score <- c()
highest_score <- c()
iter_at_highest <- c()
highest_score_sd <- c()
highest_iter_sd <- c()
max_highest <- c()
percentile_90 <- c()
score_65_plus <- c()
for(i in beta){
for (j in bp){
for(k in iter){
if(is.null(boardlist) == FALSE){
sim <- multi_mcmc(k, 100, type, i, j, boardlist = boardlist)
}else{
sim <- multi_mcmc(k, 100, type, i, j)
}
beta_df <- c(beta_df, i)
bp_df <- c(bp_df, j)
iter_df <- c(iter_df, k)
start_score <- c(start_score, mean(sim[[1]]))
highest_score <- c(highest_score, mean(sim[[2]]))
iter_at_highest <- c(iter_at_highest, mean(sim[[3]]))
highest_iter_sd <- c(highest_iter_sd, sd(sim[[3]]))
highest_score_sd <- c(highest_score_sd, sd(sim[[2]]))
max_highest <- c(max_highest, max(sim[[2]]))
percentile_90 <- c(percentile_90, quantile(sim[[2]],probs=0.9))
s <- sim[[2]]
score_65_plus <- c(score_65_plus, length(s[s>=65]))
}
}
}
df <- data.frame(
iterations = iter_df,
beta = beta_df,
break_point = bp_df,
mean_start_score = start_score,
mean_highest_score = highest_score,
sd_highest_score = highest_score_sd,
max_score = max_highest,
score_90th_percent = percentile_90,
score_65_plus = score_65_plus,
average_iterations = iter_at_highest,
sd_iterations = highest_iter_sd,
type = type
)
return(df)
}beta <- c(0.3,0.8,0.9)
bp <- c(125, 250)
iter <- c(500, 750)
# set.seed(4)
# tune1 <- tune(iter, beta, bp, "delayed")# tune1generate_100 <- function(pool){
board_list <- list()
for(i in 1:100){
board <- generate_grid(pool)
board_list[[length(board_list)+1]] = board
}
return(board_list)
}# set.seed(88)
set.seed(89)
board100 <- generate_100(cards)# OLD <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE, boardlist = board100)# NEW <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE, boardlist = board100)# tune_exact1 <- tune(1000, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact2 <- tune(1000, 0.9, 200, "annealing dynamic", boardlist = board100)
# tune_exact3 <- tune(2000, 0.7, 500, "delayed", boardlist = board100)
# tune_exact4 <- tune(2000, 0.8, 500, "annealing dynamic", boardlist = board100)
# tune_exact5 <- tune(1500, 0.9, 500, "annealing dynamic", boardlist = board100)
# tune_exact6 <- tune(750, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact7 <- tune(750, 0.3, 250, "delayed", boardlist = board100)
# tune_exact8 <- tune(500, 0.99, 250, "annealing dynamic", boardlist = board100)
# tune_exact9 <- tune(2000, 0.9, 250, "annealing dynamic", boardlist = board100)
# tune_exact10 <- tune(750, 0.9, 125, "annealing dynamic", boardlist = board100)
# tune_exact11 <- tune(1500, 0.9, 250, "annealing dynamic", boardlist = board100)# final_params <- rbind(tune_exact1,
# tune_exact2,
# tune_exact3,
# tune_exact4,
# tune_exact5,
# tune_exact6,
# tune_exact7,
# tune_exact8,
# tune_exact9,
# tune_exact10,
# tune_exact11)
#
# write.csv(final_params,here::here("final-parameters-CORRECTED-seed2.csv"), row.names = FALSE)# write.csv(tune1,here::here("new-parameters.csv"),row.names = FALSE)tuned_params1 <- read.csv(here::here("final-parameters-CORRECTED.csv"))tuned_params2 <- read.csv(here::here("final-parameters-CORRECTED-seed2.csv"))# new_params <- rbind(tuned_params1, tune1)
# write.csv(new_params,here::here("new-parameters.csv"), row.names = FALSE)# set.seed(45)
# sim_grid1 <- generate_grid(cards)
# x <- rw_mcmc(sim_grid1, 2000, "annealing dynamic", beta = 0.8, 250)# xz <- data.frame(iter = rep(1:length(x[[4]])), scores = x[[4]])
# ggplot(aes(x = iter, y = scores), data = xz) +
# geom_line()cards <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)
dfly_stream <- c(rep("Bear", 6),
rep("Bee", 4),
rep("Meadow", 10),
rep("Trout", 5),
rep("Eagle", 4),
rep("Rabbit", 4),
rep("Dragonfly", 8),
rep("Fox", 6),
rep("Deer", 6),
rep("Stream", 20),
rep("Wolf", 6)
)
bee_meadow <- c(rep("Bear", 6),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 5),
rep("Eagle", 4),
rep("Rabbit", 4),
rep("Dragonfly", 4),
rep("Fox", 6),
rep("Deer", 6),
rep("Stream", 10),
rep("Wolf", 6)
)
low_eag_rab <- c(rep("Bear", 12),
rep("Bee", 8),
rep("Meadow", 20),
rep("Trout", 10),
rep("Eagle", 2),
rep("Rabbit", 2),
rep("Dragonfly", 8),
rep("Fox", 12),
rep("Deer", 12),
rep("Stream", 20),
rep("Wolf", 12)
)# database_INCORRECT <- read.csv(here::here("database.csv"))
# database_INCORRECT <- database_INCORRECT %>%
# arrange(pool)# dist <- database_INCORRECT %>%
# group_by(pool) %>%
# summarize(count = n())
# dist# ID_NEW <- database_INCORRECT %>%
# select(ID)
#
# database_INCORRECT <- database_INCORRECT %>%
# select(2:21)shuffle <- function(df) {
shuffled_df <- as.data.frame(t(apply(df, 1, sample)))
colnames(shuffled_df) <- colnames(df)
if (!is.null(rownames(df))) {
rownames(shuffled_df) <- rownames(df)
}
return(shuffled_df)
}# set.seed(88)
# shuffled_incorrect <- shuffle(database_INCORRECT)# write.csv(shuffled_incorrect, here::here("shuffled_database.csv"), row.names = FALSE)# shuffled_incorrect <- read.csv(here::here("shuffled_database.csv"))df_to_matrix_list <- function(df) {
if (ncol(df) != 20) {
stop("Dataframe must have exactly 20 columns")
}
matrix_list <- lapply(1:nrow(df), function(i) {
matrix(as.character(df[i, ]), # Keep as character
nrow = 4,
ncol = 5,
byrow = TRUE,
dimnames = list(NULL, paste0("col", 1:5))) # Optional column names
})
if (!is.null(rownames(df))) {
names(matrix_list) <- rownames(df)
}
return(matrix_list)
}# shuffled_grid_list <- df_to_matrix_list(shuffled_incorrect)# startTime <- Sys.time()
#
# db_gen <- multi_mcmc(1000, 100, "annealing dynamic", 0.9, 200, record_board = TRUE,
# boardlist=shuffled_grid_list[names(shuffled_grid_list)[1:100]],
# card_name = "bee_meadow")
# endTime <- Sys.time()
# print(endTime - startTime)
#
# write.csv(db_gen, here::here("database-CORRECTED.csv"), row.names = FALSE)# startTime <- Sys.time()
#
# db_gen <- multi_mcmc(1000, 4000, "annealing dynamic", 0.9, 200, record_board = TRUE,
# boardlist=shuffled_grid_list[names(shuffled_grid_list)[46001:50000]],
# card_name = "low_eagle_rabbit")
# endTime <- Sys.time()
# print(endTime - startTime)
#
# database_old <- read.csv(here::here("database-CORRECTED.csv"))
# database_new <- rbind(database_old, db_gen)
# database_new <- database_new %>% distinct()
# write.csv(database_new, here::here("database-CORRECTED.csv"), row.names = FALSE)# head(db_gen)database_new <- read.csv(here::here("database-CORRECTED.csv"))dist2 <- database_new %>%
group_by(pool) %>%
summarize(count = n())
dist2# A tibble: 4 × 2
pool count
<chr> <int>
1 bee_meadow 6000
2 default 30000
3 dragonfly_stream 6000
4 low_eagle_rabbit 8000
# Add the original IDs back
# database_new_ID <- database_new %>%
# mutate(ID = ID_NEW$ID) %>%
# select(ID, 1:43) %>%
# arrange(ID)
# write.csv(database_new_ID, here::here("database-CORRECTED.csv"), row.names = FALSE)database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "default") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "dragonfly_stream") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "bee_meadow") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")database_new %>%
mutate(score = as.numeric(score)) %>%
filter(pool == "low_eagle_rabbit") %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 3, fill = "steelblue", color = "black")# database_new <- read.csv(here::here("database.csv"))
#
# database_new_ID <- database_new %>%
# mutate(ID = rep(1:50000)) %>%
# select(ID, 1:22)
#
# write.csv(database_new_ID, here::here("database.csv"), row.names = FALSE)# database <- read.csv(here::here("database-CORRECTED.csv"))# database_new <- database %>%
# select(1:23)
#
# write.csv(database_new, here::here("database-CORRECTED-a.csv"), row.names = FALSE)# database_start <- database %>%
# select(1,25:44,24)
#
# write.csv(database_start, here::here("database-CORRECTED-s.csv"), row.names = FALSE)database_new <- read.csv(here::here("database-CORRECTED-a.csv"))grids <- database_new %>% select(-c(ID, pool))
dmy <- dummyVars(" ~ .", data = grids)
grids <- data.frame(predict(dmy, newdata = grids))
grids_matrix <- as.matrix(grids)set.seed(4)
km_spec1 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = grids_matrix)
km_wflow1 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec1)
km_fitted1 <- km_wflow1 |> fit(grids_matrix)
km_fitted1 |> extract_centroids()# A tibble: 4 × 222
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0482 0.0403 0.137 0.0559 0.0353
2 Cluster… 0.0647 0.0284 0.125 0.0532 0.0325
3 Cluster… 0.0994 0.0198 0.111 0.0488 0.0344
4 Cluster… 0.158 0.0131 0.0968 0.0442 0.0539
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km1 <- kmeans(grids_matrix, centers = 4)
grids_km1$totss[1] 4363055
grids_km1$withinss[1] 318486.3 196194.0 458591.5 371015.6
grids_km1$betweenss[1] 3018768
x <- km_fitted1 |> extract_centroids()
x# A tibble: 4 × 222
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0482 0.0403 0.137 0.0559 0.0353
2 Cluster… 0.0647 0.0284 0.125 0.0532 0.0325
3 Cluster… 0.0994 0.0198 0.111 0.0488 0.0344
4 Cluster… 0.158 0.0131 0.0968 0.0442 0.0539
# ℹ 216 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
pc <- prcomp(grids_matrix)cumul_vars <- cumsum(pc$sdev^2)/sum(pc$sdev^2)
cumul_vars [1] 0.7990713 0.8085696 0.8151198 0.8208002 0.8246353 0.8282929 0.8318508
[8] 0.8350357 0.8379635 0.8407963 0.8435440 0.8462028 0.8487573 0.8512485
[15] 0.8537064 0.8560526 0.8583538 0.8605829 0.8627621 0.8649096 0.8669842
[22] 0.8690298 0.8710401 0.8730306 0.8749617 0.8768473 0.8787150 0.8805724
[29] 0.8822680 0.8839363 0.8855721 0.8871926 0.8887514 0.8902934 0.8917863
[36] 0.8932713 0.8947479 0.8961980 0.8975581 0.8989054 0.9002266 0.9015448
[43] 0.9028343 0.9041140 0.9053820 0.9066372 0.9078713 0.9090997 0.9103152
[50] 0.9115020 0.9126733 0.9138378 0.9149813 0.9161163 0.9172431 0.9183652
[57] 0.9194728 0.9205711 0.9216547 0.9227280 0.9237937 0.9248461 0.9258916
[64] 0.9269296 0.9279460 0.9289554 0.9299574 0.9309509 0.9319060 0.9328501
[71] 0.9337880 0.9347152 0.9356375 0.9365395 0.9374277 0.9383124 0.9391930
[78] 0.9400593 0.9409234 0.9417758 0.9426169 0.9434524 0.9442858 0.9451073
[85] 0.9459235 0.9467280 0.9475270 0.9483193 0.9491100 0.9498936 0.9506737
[92] 0.9514378 0.9521992 0.9529520 0.9536946 0.9544332 0.9551676 0.9558978
[99] 0.9566178 0.9573257 0.9580284 0.9587258 0.9594118 0.9600901 0.9607608
[106] 0.9614246 0.9620849 0.9627360 0.9633820 0.9640142 0.9646441 0.9652636
[113] 0.9658721 0.9664730 0.9670685 0.9676560 0.9682394 0.9688168 0.9693920
[120] 0.9699625 0.9705296 0.9710928 0.9716514 0.9722043 0.9727534 0.9733000
[127] 0.9738434 0.9743822 0.9749177 0.9754473 0.9759608 0.9764647 0.9769677
[134] 0.9774678 0.9779626 0.9784514 0.9789357 0.9794191 0.9798949 0.9803567
[141] 0.9808163 0.9812710 0.9817239 0.9821709 0.9826135 0.9830502 0.9834828
[148] 0.9839049 0.9843214 0.9847349 0.9851440 0.9855439 0.9859416 0.9863376
[155] 0.9867263 0.9871117 0.9874910 0.9878659 0.9882330 0.9885971 0.9889546
[162] 0.9893095 0.9896598 0.9900045 0.9903462 0.9906723 0.9909957 0.9913162
[169] 0.9916327 0.9919474 0.9922569 0.9925661 0.9928735 0.9931770 0.9934799
[176] 0.9937808 0.9940796 0.9943759 0.9946708 0.9949610 0.9952431 0.9955192
[183] 0.9957940 0.9960661 0.9963357 0.9966025 0.9968679 0.9971311 0.9973906
[190] 0.9976455 0.9978960 0.9981376 0.9983756 0.9986124 0.9988470 0.9990717
[197] 0.9992804 0.9994651 0.9996469 0.9998269 1.0000000 1.0000000 1.0000000
[204] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[211] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
[218] 1.0000000 1.0000000 1.0000000 1.0000000
grids_reduced <- pc$x[, 1:8]
grids_pca_km <- kmeans(grids_reduced, 4)
grids_pca_km$totss[1] 3643307
grids_pca_km$withinss[1] 186732.3 113977.2 127585.7 174209.0
grids_pca_km$betweenss[1] 3040803
pc$rotation[,1:2] PC1 PC2
row1col1Bear 3.525471e-03 1.208440e-02
row1col1Bee -1.229508e-03 2.847542e-02
row1col1Deer -1.563141e-03 6.438670e-03
row1col1Dragonfly -3.087862e-04 -4.493300e-02
row1col1Eagle 2.816748e-04 -1.600740e-02
row1col1Fox 1.203340e-04 -4.565968e-03
row1col1Meadow -1.743163e-03 1.175701e-01
row1col1Rabbit -7.911279e-04 -8.263966e-03
row1col1Stream -5.985244e-04 -9.325707e-02
row1col1Trout -1.094325e-03 -1.417474e-02
row1col1Wolf 3.401097e-03 1.663354e-02
row1col2Bear 2.230156e-03 8.241668e-03
row1col2Bee -1.749250e-03 4.157861e-02
row1col2Deer -4.099825e-04 1.430501e-02
row1col2Dragonfly 7.485636e-05 -3.830137e-02
row1col2Eagle -6.510955e-04 -2.376168e-02
row1col2Fox 6.797353e-04 4.522287e-03
row1col2Meadow -1.692704e-03 1.273517e-01
row1col2Rabbit -4.789587e-04 6.355118e-03
row1col2Stream 1.589351e-03 -1.176590e-01
row1col2Trout -2.403127e-03 -3.218066e-02
row1col2Wolf 2.811020e-03 9.548389e-03
row1col3Bear 1.692085e-03 -1.994705e-04
row1col3Bee -1.895569e-03 1.638868e-03
row1col3Deer 3.658011e-04 2.325672e-03
row1col3Dragonfly -9.823765e-06 -2.775846e-03
row1col3Eagle -1.099400e-03 -4.714311e-04
row1col3Fox 9.373601e-04 -3.936103e-04
row1col3Meadow -1.803892e-03 3.662032e-03
row1col3Rabbit -6.802968e-04 1.281781e-03
row1col3Stream 2.526429e-03 -5.500242e-03
row1col3Trout -2.856538e-03 -8.311542e-04
row1col3Wolf 2.823844e-03 1.263401e-03
row1col4Bear 2.365416e-03 -5.615433e-03
row1col4Bee -1.697269e-03 -3.939167e-02
row1col4Deer -5.800104e-04 -1.056053e-02
row1col4Dragonfly 5.442077e-05 3.708769e-02
row1col4Eagle -6.695388e-04 2.251931e-02
row1col4Fox 9.502406e-04 -3.137602e-03
row1col4Meadow -2.057133e-03 -1.243571e-01
row1col4Rabbit -4.448784e-04 -3.930111e-03
row1col4Stream 1.585734e-03 1.103649e-01
row1col4Trout -2.303197e-03 3.042570e-02
row1col4Wolf 2.796215e-03 -1.340519e-02
row1col5Bear 3.306183e-03 -1.082415e-02
row1col5Bee -1.291068e-03 -2.647484e-02
row1col5Deer -1.652200e-03 -8.678304e-03
row1col5Dragonfly -2.482084e-04 4.608562e-02
row1col5Eagle 1.513419e-04 1.596037e-02
row1col5Fox 2.521500e-04 6.025699e-04
row1col5Meadow -1.891161e-03 -1.149612e-01
row1col5Rabbit -8.049679e-04 8.140825e-03
row1col5Stream -3.730683e-04 8.427917e-02
row1col5Trout -9.794445e-04 1.492748e-02
row1col5Wolf 3.530443e-03 -9.057571e-03
row2col1Bear 2.706502e-03 7.159172e-03
row2col1Bee -1.838939e-03 6.279956e-02
row2col1Deer 2.658413e-04 8.714832e-03
row2col1Dragonfly 4.202294e-06 -5.758599e-02
row2col1Eagle -7.078046e-04 -2.305346e-02
row2col1Fox 8.849161e-04 1.046862e-04
row2col1Meadow -3.225206e-03 2.295829e-01
row2col1Rabbit -1.196575e-04 -4.273469e-03
row2col1Stream 9.758095e-04 -1.783810e-01
row2col1Trout -2.431067e-03 -5.204120e-02
row2col1Wolf 3.485403e-03 6.973955e-03
row2col2Bear 6.318311e-04 9.754261e-03
row2col2Bee -3.254568e-03 1.064106e-01
row2col2Deer 6.514257e-04 6.540492e-03
row2col2Dragonfly 3.439755e-04 -3.858544e-02
row2col2Eagle -1.309748e-03 -4.043001e-03
row2col2Fox 9.031619e-04 4.278362e-03
row2col2Meadow -1.831383e-03 2.016166e-01
row2col2Rabbit 3.424934e-04 1.757391e-03
row2col2Stream 5.653596e-03 -1.870163e-01
row2col2Trout -3.825911e-03 -1.058061e-01
row2col2Wolf 1.695126e-03 5.093176e-03
row2col3Bear -1.357112e-03 3.820824e-03
row2col3Bee -3.407352e-03 -3.877209e-05
row2col3Deer 8.141742e-04 -6.092493e-04
row2col3Dragonfly 8.229033e-04 -6.379700e-04
row2col3Eagle -1.935517e-03 5.162879e-04
row2col3Fox 8.657173e-04 4.248977e-04
row2col3Meadow -1.361598e-03 2.622730e-03
row2col3Rabbit 4.002840e-04 8.882283e-04
row2col3Stream 7.389636e-03 -2.156863e-03
row2col3Trout -3.840094e-03 -4.711572e-03
row2col3Wolf 1.608960e-03 -1.185411e-04
row2col4Bear 3.132284e-04 -1.220364e-02
row2col4Bee -3.446916e-03 -1.007165e-01
row2col4Deer 8.519385e-04 -6.969672e-03
row2col4Dragonfly 3.758565e-04 3.566678e-02
row2col4Eagle -1.356372e-03 5.270959e-03
row2col4Fox 1.023162e-03 -4.198022e-03
row2col4Meadow -1.841906e-03 -1.999834e-01
row2col4Rabbit 4.245672e-04 -1.213776e-03
row2col4Stream 5.593015e-03 1.811398e-01
row2col4Trout -3.819017e-03 1.079895e-01
row2col4Wolf 1.882444e-03 -4.782140e-03
row2col5Bear 2.621287e-03 -6.284689e-03
row2col5Bee -1.882357e-03 -6.418720e-02
row2col5Deer 4.777415e-04 -8.721750e-03
row2col5Dragonfly -2.887361e-04 5.823075e-02
row2col5Eagle -4.074980e-04 2.091108e-02
row2col5Fox 1.017649e-03 -4.306659e-03
row2col5Meadow -3.389228e-03 -2.198540e-01
row2col5Rabbit -1.236417e-04 4.315750e-03
row2col5Stream 1.049571e-03 1.751737e-01
row2col5Trout -2.453641e-03 5.029976e-02
row2col5Wolf 3.378854e-03 -5.576761e-03
row3col1Bear 3.021285e-03 6.968097e-03
row3col1Bee -1.844641e-03 6.236495e-02
row3col1Deer -6.548834e-05 4.560889e-03
row3col1Dragonfly -1.132292e-04 -5.763261e-02
row3col1Eagle -4.896807e-04 -2.211167e-02
row3col1Fox 1.135382e-03 1.901011e-03
row3col1Meadow -3.336063e-03 2.266424e-01
row3col1Rabbit -2.577588e-04 -3.049764e-03
row3col1Stream 1.147342e-03 -1.775896e-01
row3col1Trout -2.451324e-03 -4.997501e-02
row3col1Wolf 3.254177e-03 7.921327e-03
row3col2Bear 5.290658e-04 9.130014e-03
row3col2Bee -3.590559e-03 1.046187e-01
row3col2Deer 8.097848e-04 6.023265e-03
row3col2Dragonfly 4.689507e-04 -3.495873e-02
row3col2Eagle -1.373730e-03 -4.809535e-03
row3col2Fox 8.928715e-04 4.596970e-03
row3col2Meadow -1.779388e-03 1.977747e-01
row3col2Rabbit 3.249929e-04 1.881293e-03
row3col2Stream 5.416676e-03 -1.820271e-01
row3col2Trout -3.600518e-03 -1.079460e-01
row3col2Wolf 1.901853e-03 5.716463e-03
row3col3Bear -1.323309e-03 -3.366134e-03
row3col3Bee -3.391096e-03 5.036990e-04
row3col3Deer 8.861450e-04 5.779024e-04
row3col3Dragonfly 6.810407e-04 -9.540905e-04
row3col3Eagle -1.882615e-03 8.591630e-04
row3col3Fox 8.590569e-04 -9.611547e-04
row3col3Meadow -1.433720e-03 -2.669119e-03
row3col3Rabbit 2.900880e-04 1.265972e-04
row3col3Stream 7.324821e-03 4.413155e-03
row3col3Trout -3.725313e-03 1.426930e-03
row3col3Wolf 1.714900e-03 4.305197e-05
row3col4Bear 7.439585e-04 -8.508973e-03
row3col4Bee -3.702024e-03 -1.047104e-01
row3col4Deer 8.095716e-04 -7.273598e-03
row3col4Dragonfly 1.990299e-04 3.849704e-02
row3col4Eagle -9.602989e-04 4.975071e-03
row3col4Fox 8.586065e-04 -4.026960e-03
row3col4Meadow -2.095723e-03 -2.018740e-01
row3col4Rabbit 4.226133e-04 -2.878118e-03
row3col4Stream 5.606404e-03 1.869644e-01
row3col4Trout -3.727345e-03 1.050996e-01
row3col4Wolf 1.845206e-03 -6.264071e-03
row3col5Bear 2.507376e-03 -8.044082e-03
row3col5Bee -1.792411e-03 -6.145913e-02
row3col5Deer -2.027287e-05 -7.788495e-03
row3col5Dragonfly 1.770525e-06 5.697001e-02
row3col5Eagle -3.208647e-04 2.078623e-02
row3col5Fox 1.300581e-03 -1.069027e-03
row3col5Meadow -3.776937e-03 -2.268463e-01
row3col5Rabbit 7.485456e-06 2.727875e-03
row3col5Stream 1.143120e-03 1.790783e-01
row3col5Trout -2.488087e-03 5.351694e-02
row3col5Wolf 3.438240e-03 -7.872314e-03
row4col1Bear 3.299139e-03 1.113669e-02
row4col1Bee -1.304996e-03 2.835685e-02
row4col1Deer -1.478687e-03 9.310936e-03
row4col1Dragonfly -2.261399e-04 -4.420473e-02
row4col1Eagle 3.721683e-04 -1.546505e-02
row4col1Fox -1.073184e-04 -4.167611e-03
row4col1Meadow -1.800250e-03 1.142681e-01
row4col1Rabbit -4.715558e-04 -1.117283e-02
row4col1Stream -2.436098e-04 -8.676783e-02
row4col1Trout -1.028451e-03 -1.397925e-02
row4col1Wolf 2.989700e-03 1.268477e-02
row4col2Bear 2.687398e-03 8.091017e-03
row4col2Bee -1.667118e-03 3.896600e-02
row4col2Deer -4.883495e-04 9.704055e-03
row4col2Dragonfly 1.255462e-04 -3.683669e-02
row4col2Eagle -7.586840e-04 -2.241905e-02
row4col2Fox 4.013194e-04 3.350299e-03
row4col2Meadow -2.023243e-03 1.264752e-01
row4col2Rabbit -5.476238e-04 4.390553e-03
row4col2Stream 1.591733e-03 -1.091139e-01
row4col2Trout -2.332282e-03 -2.868451e-02
row4col2Wolf 3.011304e-03 6.076988e-03
row4col3Bear 1.860172e-03 -3.426441e-03
row4col3Bee -1.874538e-03 -1.026529e-03
row4col3Deer 3.319346e-04 -5.533426e-04
row4col3Dragonfly -6.885821e-05 2.031192e-03
row4col3Eagle -7.901326e-04 -4.173972e-04
row4col3Fox 8.595626e-04 1.367720e-03
row4col3Meadow -1.962354e-03 -1.896016e-03
row4col3Rabbit -7.221047e-04 -2.335551e-04
row4col3Stream 2.521984e-03 5.623811e-03
row4col3Trout -2.702473e-03 1.131848e-03
row4col3Wolf 2.546807e-03 -2.601291e-03
row4col4Bear 2.351488e-03 -7.469977e-03
row4col4Bee -1.783549e-03 -4.193343e-02
row4col4Deer -2.845689e-04 -1.179528e-02
row4col4Dragonfly 2.056544e-05 3.858043e-02
row4col4Eagle -5.826719e-04 2.134230e-02
row4col4Fox 7.637383e-04 -6.255920e-03
row4col4Meadow -2.166699e-03 -1.303242e-01
row4col4Rabbit -5.412153e-04 -4.560138e-03
row4col4Stream 1.611892e-03 1.154778e-01
row4col4Trout -2.427926e-03 3.594741e-02
row4col4Wolf 3.038946e-03 -9.008968e-03
row4col5Bear 3.396172e-03 -1.255136e-02
row4col5Bee -1.389434e-03 -2.830883e-02
row4col5Deer -1.828637e-03 -8.506739e-03
row4col5Dragonfly -2.623748e-04 4.728809e-02
row4col5Eagle 2.895924e-04 1.659036e-02
row4col5Fox -3.302766e-05 2.067047e-03
row4col5Meadow -1.872072e-03 -1.179045e-01
row4col5Rabbit -2.639154e-04 1.093532e-02
row4col5Stream -3.977629e-04 8.842771e-02
row4col5Trout -1.018393e-03 1.319532e-02
row4col5Wolf 3.379853e-03 -1.123246e-02
score -9.995262e-01 3.171744e-04
# No lowest
grids <- database_new %>%
filter(score > 57) %>%
select(-c(ID, pool))
dmy <- dummyVars(" ~ .", data = grids)
grids <- data.frame(predict(dmy, newdata = grids))grids_noscore <- grids %>%
select(-c(score))
noscore_matrix <- as.matrix(grids_noscore)# write.csv(grids_noscore, here::here("grids_noscore-CORRECTED.csv"), row.names = FALSE)set.seed(4)
km_spec2 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 3 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0494 0.00369 0.127 0.0927 0.0518
2 Cluster… 0.0605 0.102 0.0848 0.00277 0.00682
3 Cluster… 0.0656 0.0153 0.184 0.0437 0.0314
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 3)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 164572.4 283239.0 162041.1
grids_km2$betweenss[1] 36388.78
set.seed(4)
km_spec2 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 4 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0450 0.00143 0.0818 0.122 0.0624
2 Cluster… 0.0905 0.0450 0.124 0.0183 0.0102
3 Cluster… 0.0368 0.0818 0.142 0.0152 0.0199
4 Cluster… 0.0573 0.0129 0.179 0.0539 0.0389
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 4)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 145872.1 144632.4 153272.9 154573.8
grids_km2$betweenss[1] 47890.06
set.seed(4)
km_spec2 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 5 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0452 0.00187 0.0774 0.121 0.0643
2 Cluster… 0.0915 0.0670 0.0873 0.00583 0.00636
3 Cluster… 0.0279 0.126 0.101 0.00438 0.00666
4 Cluster… 0.0687 0.00912 0.192 0.0451 0.0307
5 Cluster… 0.0556 0.00726 0.183 0.0574 0.0403
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 5)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 87063.98 140905.89 123631.30 143523.59 95218.65
grids_km2$betweenss[1] 55897.84
set.seed(4)
km_spec2 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 6 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0348 0.00278 0.0728 0.112 0.0869
2 Cluster… 0.0960 0.0721 0.0857 0.00465 0.00733
3 Cluster… 0.0235 0.124 0.111 0.00491 0.00655
4 Cluster… 0.0731 0.00710 0.189 0.0462 0.0281
5 Cluster… 0.0757 0.00208 0.127 0.100 0.0121
6 Cluster… 0.0358 0.0152 0.201 0.0424 0.0541
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 6)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 107160.59 84558.43 102995.44 85750.57 104087.50 105745.73
grids_km2$betweenss[1] 55942.98
set.seed(4)
km_spec2 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 7 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0183 0.00381 0.0782 0.0816 0.110
2 Cluster… 0.0954 0.0714 0.0853 0.00475 0.00694
3 Cluster… 0.0234 0.129 0.106 0.00449 0.00636
4 Cluster… 0.0737 0.00746 0.192 0.0450 0.0281
5 Cluster… 0.0666 0.00120 0.0899 0.153 0.0185
6 Cluster… 0.0796 0.00485 0.152 0.0741 0.0221
7 Cluster… 0.0333 0.0149 0.200 0.0434 0.0469
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 7)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 65988.46 78541.83 88200.81 81343.84 84341.52 65373.08 114440.56
grids_km2$betweenss[1] 68011.17
set.seed(4)
km_spec2 <- k_means(num_clusters = 8)
grids_recipe <- recipe(~., data = noscore_matrix)
km_wflow2 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec2)
km_fitted2 <- km_wflow2 |> fit(noscore_matrix)
km_fitted2 |> extract_centroids()# A tibble: 8 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0188 0.00332 0.0760 0.0846 0.110
2 Cluster… 0.0983 0.0730 0.0840 0.00492 0.00846
3 Cluster… 0.0152 0.150 0.0706 0.00199 0.00398
4 Cluster… 0.101 0.00809 0.165 0.0468 0.0134
5 Cluster… 0.0670 0.00121 0.0910 0.152 0.0192
6 Cluster… 0.0798 0.00406 0.152 0.0754 0.0218
7 Cluster… 0.0395 0.0297 0.209 0.0285 0.0330
8 Cluster… 0.0340 0.0113 0.201 0.0446 0.0491
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
grids_km2 <- kmeans(noscore_matrix, centers = 8)
grids_km2$totss[1] 646241.3
grids_km2$withinss[1] 54845.88 75562.18 76206.71 107942.76 56648.50 77665.17 64363.53
[8] 61319.18
grids_km2$betweenss[1] 71687.35
km_fitted2 |> extract_centroids()# A tibble: 8 × 221
.cluster row1col1Bear row1col1Bee row1col1Deer row1col1Dragonfly row1col1Eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster… 0.0188 0.00332 0.0760 0.0846 0.110
2 Cluster… 0.0983 0.0730 0.0840 0.00492 0.00846
3 Cluster… 0.0152 0.150 0.0706 0.00199 0.00398
4 Cluster… 0.101 0.00809 0.165 0.0468 0.0134
5 Cluster… 0.0670 0.00121 0.0910 0.152 0.0192
6 Cluster… 0.0798 0.00406 0.152 0.0754 0.0218
7 Cluster… 0.0395 0.0297 0.209 0.0285 0.0330
8 Cluster… 0.0340 0.0113 0.201 0.0446 0.0491
# ℹ 215 more variables: row1col1Fox <dbl>, row1col1Meadow <dbl>,
# row1col1Rabbit <dbl>, row1col1Stream <dbl>, row1col1Trout <dbl>,
# row1col1Wolf <dbl>, row1col2Bear <dbl>, row1col2Bee <dbl>,
# row1col2Deer <dbl>, row1col2Dragonfly <dbl>, row1col2Eagle <dbl>,
# row1col2Fox <dbl>, row1col2Meadow <dbl>, row1col2Rabbit <dbl>,
# row1col2Stream <dbl>, row1col2Trout <dbl>, row1col2Wolf <dbl>,
# row1col3Bear <dbl>, row1col3Bee <dbl>, row1col3Deer <dbl>, …
km_fitted2 %>% extract_cluster_assignment()# A tibble: 36,708 × 1
.cluster
<fct>
1 Cluster_1
2 Cluster_2
3 Cluster_3
4 Cluster_3
5 Cluster_1
6 Cluster_4
7 Cluster_5
8 Cluster_4
9 Cluster_5
10 Cluster_3
# ℹ 36,698 more rows
database_new %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
min = min(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
max=max(score),
count=n()) mean_score sd_score min p25th median p75th max count
1 61.67342 8.346453 17 57 63 67 88 50000
database_lowest_removed <- database_new %>%
filter(score > 57)database1 <- database_lowest_removed %>%
mutate(`7cluster` = extract_cluster_assignment(km_fitted2)$.cluster)summarized <- database1 %>%
group_by(`7cluster`) %>%
summarise(mean_score=mean(score),
sd_score=sd(score),
.groups = 'drop')
summarized# A tibble: 8 × 3
`7cluster` mean_score sd_score
<fct> <dbl> <dbl>
1 Cluster_1 65.5 4.96
2 Cluster_2 65.6 4.97
3 Cluster_3 65.6 5.00
4 Cluster_4 65.4 4.83
5 Cluster_5 65.6 4.90
6 Cluster_6 65.6 4.99
7 Cluster_7 65.6 4.99
8 Cluster_8 65.5 4.92
# write.csv(summarized, here::here("score_removed_c_summary.csv"), row.names = FALSE)# database1 %>%
# group_by(cluster) %>%
# summarise(mean_score=mean(score),
# sd_score=sd(score),
# count=n(),
# .groups = 'drop') # database1 %>%
# group_by(cluster) %>%
# summarise(mean_score=mean(score),
# sd_score=sd(score),
# count=n(),
# .groups = 'drop') summary(database_new$score) Min. 1st Qu. Median Mean 3rd Qu. Max.
17.00 57.00 63.00 61.67 67.00 88.00
database_new3 <- database_new %>%
filter(score > 57) %>%
select(-c(ID,pool,score))
ID_cols <- database_new %>%
filter(score > 57) %>%
select(ID)# Run once
# first_row <- score_grid(matrix(c(t(database_new3[1,])),nrow=4,ncol=5,byrow=T), individual=TRUE)
#
# db_individual <- data.frame(
# bear_score = c(first_row[1]),
# bee_score = c(first_row[2]),
# meadow_score = c(first_row[3]),
# trout_score = c(first_row[4]),
# eagle_score = c(first_row[5]),
# rabbit_score = c(first_row[6]),
# dragonfly_score = c(first_row[7]),
# fox_score = c(first_row[8]),
# deer_score = c(first_row[9]),
# stream_score = c(first_row[10]),
# wolves_score = c(first_row[11]),
# dv_score = c(first_row[12])
# )
#
# for(i in 2:nrow(database_new3)){
# row <- as.list(score_grid(matrix(c(t(database_new3[i,])),nrow=4,ncol=5,byrow=T), individual=TRUE))
# db_individual <- rbind(db_individual, row)
# }# db_individual_ID <- db_individual %>%
# mutate(ID = ID_cols$ID) %>%
# select(ID, 1:12)
#
# write.csv(db_individual_ID, here::here("db_individual_lowest_removed-CORRECTED.csv"), row.names = FALSE)db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))
db_individual_removed <- db_individual_removed %>%
select(-ID)
individual_matrix <- as.matrix(db_individual_removed)set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 3 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 6.47 5.48 5.08 6.39 2.85
2 Cluster_2 5.13 13.4 13.4 4.49 3.27
3 Cluster_3 6.07 4.51 5.51 7.59 12.5
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 3)
grids_km3$totss[1] 7106704
grids_km3$withinss[1] 1444484 2254308 1430584
grids_km3$betweenss[1] 1977328
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 4 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 8.51 6.75 5.27 5.96 3.30
2 Cluster_2 3.77 4.09 4.98 7.15 3.41
3 Cluster_3 4.70 13.6 13.9 4.47 3.29
4 Cluster_4 5.37 4.22 5.75 7.69 13.3
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 4)
grids_km3$totss[1] 7106704
grids_km3$withinss[1] 1186674.7 1251643.8 1452332.0 760011.4
grids_km3$betweenss[1] 2456042
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 5 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 4.39 4.82 7.46 5.75 3.84
2 Cluster_2 3.91 4.23 4.45 7.25 3.47
3 Cluster_3 4.59 13.8 14.0 4.45 3.32
4 Cluster_4 5.34 4.35 5.26 7.76 14.2
5 Cluster_5 11.4 8.04 4.37 6.47 3.82
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 5)
grids_km3$totss[1] 7106704
grids_km3$withinss[1] 736378.0 871435.4 1106223.0 659987.2 1002434.4
grids_km3$betweenss[1] 2730246
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 6 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 4.85 5.49 4.96 5.97 3.95
2 Cluster_2 3.87 4.16 4.33 7.28 3.45
3 Cluster_3 4.97 18.7 13.0 3.98 3.00
4 Cluster_4 4.34 8.36 13.8 5.11 3.90
5 Cluster_5 5.33 4.36 5.05 7.78 14.6
6 Cluster_6 12.0 7.59 4.65 6.53 3.86
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 6)
grids_km3$totss[1] 7106704
grids_km3$withinss[1] 825052.1 663096.3 580316.3 855767.0 693384.0 510255.8
grids_km3$betweenss[1] 2978833
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 5.05 5.66 5.11 5.07 3.68
2 Cluster_2 3.88 4.20 4.30 7.04 3.36
3 Cluster_3 4.91 18.8 13.0 3.94 3.00
4 Cluster_4 4.37 8.49 13.9 4.83 3.71
5 Cluster_5 5.53 4.49 5.24 6.01 16.0
6 Cluster_6 12.5 7.69 4.72 5.84 3.79
7 Cluster_7 4.85 4.84 5.18 12.3 7.68
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 7)
grids_km3$totss[1] 7106704
grids_km3$withinss[1] 633270.6 555515.0 616836.7 645917.9 472100.9 571064.9 485252.6
grids_km3$betweenss[1] 3126746
centroids <- km_fitted3 |> extract_centroids()
centroids# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 5.05 5.66 5.11 5.07 3.68
2 Cluster_2 3.88 4.20 4.30 7.04 3.36
3 Cluster_3 4.91 18.8 13.0 3.94 3.00
4 Cluster_4 4.37 8.49 13.9 4.83 3.71
5 Cluster_5 5.53 4.49 5.24 6.01 16.0
6 Cluster_6 12.5 7.69 4.72 5.84 3.79
7 Cluster_7 4.85 4.84 5.18 12.3 7.68
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
database_lowest_removed <- database_new %>%
filter(score > 57)database3 <- database_lowest_removed %>%
mutate(cluster = extract_cluster_assignment(km_fitted3)$.cluster)database3 %>%
group_by(cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')# A tibble: 7 × 7
cluster mean_score sd_score p25th median p75th count
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Cluster_1 63.2 3.67 60 63 66 5928
2 Cluster_2 63.9 3.98 61 63 67 4586
3 Cluster_3 69.4 5.42 66 70 73 3999
4 Cluster_4 66.0 4.91 62 66 70 7613
5 Cluster_5 67.4 5.10 63 67 71 4899
6 Cluster_6 64.7 4.46 61 64 68 5773
7 Cluster_7 65.4 4.41 62 65 68 3910
db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))
db_individual_removed <- db_individual_removed %>%
select(-ID)db_scaled <- data.frame(lapply(db_individual_removed,scale))individual_matrix <- as.matrix(db_scaled)set.seed(4)
km_spec3 <- k_means(num_clusters = 3)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 3 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 0.230 -0.400 -0.471 0.243 0.649
2 Cluster_2 -0.290 -0.461 -0.449 0.199 -0.375
3 Cluster_3 -0.0605 0.798 0.872 -0.426 -0.489
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 3)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 84964.63 144453.23 121999.44
grids_km3$betweenss[1] 89066.7
set.seed(4)
km_spec3 <- k_means(num_clusters = 4)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 4 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 0.558 -0.217 -0.395 0.0921 -0.326
2 Cluster_2 -0.377 -0.477 -0.442 0.221 -0.363
3 Cluster_3 -0.249 1.06 1.15 -0.461 -0.462
4 Cluster_4 -0.169 -0.422 -0.302 0.175 1.30
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 4)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 75773.09 83868.58 68995.63 98334.26
grids_km3$betweenss[1] 113512.4
set.seed(4)
km_spec3 <- k_means(num_clusters = 5)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 5 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.41 -0.0630 -0.424 0.256 -0.187
2 Cluster_2 -0.409 -0.483 -0.448 0.248 -0.357
3 Cluster_3 -0.302 1.14 1.20 -0.468 -0.457
4 Cluster_4 -0.217 -0.433 -0.303 0.196 1.41
5 Cluster_5 -0.302 -0.294 -0.208 -0.120 -0.331
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 5)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 46251.37 61879.28 80191.68 56789.71 66088.32
grids_km3$betweenss[1] 129283.6
set.seed(4)
km_spec3 <- k_means(num_clusters = 6)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 6 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.58 -0.00761 -0.401 -0.0587 -0.245
2 Cluster_2 -0.406 -0.493 -0.462 0.258 -0.361
3 Cluster_3 -0.301 1.20 1.24 -0.520 -0.487
4 Cluster_4 -0.170 -0.405 -0.260 -0.233 1.42
5 Cluster_5 -0.269 -0.247 -0.162 -0.398 -0.409
6 Cluster_6 -0.235 -0.322 -0.265 1.58 0.456
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 6)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 26449.07 55431.15 45414.34 48547.69 58705.05 67206.20
grids_km3$betweenss[1] 138730.5
set.seed(4)
km_spec3 <- k_means(num_clusters = 7)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 7 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 1.59 -0.0670 -0.495 -0.0385 -0.214
2 Cluster_2 -0.407 -0.521 -0.499 0.271 -0.349
3 Cluster_3 -0.282 0.859 1.05 -0.465 -0.315
4 Cluster_4 -0.173 -0.461 -0.333 -0.209 1.53
5 Cluster_5 -0.240 -0.332 -0.324 -0.336 -0.365
6 Cluster_6 -0.224 -0.374 -0.343 1.62 0.537
7 Cluster_7 -0.161 1.16 1.16 -0.409 -0.668
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 7)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 37696.54 50293.45 36513.09 48488.26 51990.11 36633.54 30047.61
grids_km3$betweenss[1] 148821.4
set.seed(4)
km_spec3 <- k_means(num_clusters = 8)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 8 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 -0.210 -0.311 -0.245 -0.287 -0.301
2 Cluster_2 -0.111 -0.306 -0.293 -0.201 -0.260
3 Cluster_3 -0.278 0.928 1.08 -0.478 -0.302
4 Cluster_4 -0.165 -0.454 -0.336 -0.188 1.61
5 Cluster_5 -0.224 -0.359 -0.324 1.71 0.564
6 Cluster_6 -0.174 1.23 1.20 -0.433 -0.676
7 Cluster_7 -0.416 -0.520 -0.502 0.290 -0.355
8 Cluster_8 1.69 -0.0506 -0.484 -0.0154 -0.213
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 8)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 49105.88 32733.17 33089.61 32935.30 27006.21 27630.43 37460.55 38537.18
grids_km3$betweenss[1] 161985.7
set.seed(4)
km_spec3 <- k_means(num_clusters = 9)
grids_recipe <- recipe(~., data = individual_matrix)
km_wflow3 <- workflow() |>
add_recipe(grids_recipe) |>
add_model(km_spec3)
km_fitted3 <- km_wflow3 |> fit(individual_matrix)
km_fitted3 |> extract_centroids()# A tibble: 9 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 -0.195 -0.287 -0.201 -0.300 -0.294
2 Cluster_2 -0.0671 -0.291 -0.284 -0.213 -0.245
3 Cluster_3 -0.277 0.956 1.10 -0.477 -0.301
4 Cluster_4 -0.164 -0.450 -0.330 -0.192 1.63
5 Cluster_5 -0.315 -0.405 -0.466 0.0971 -0.260
6 Cluster_6 -0.217 -0.363 -0.336 1.73 0.583
7 Cluster_7 -0.182 1.27 1.21 -0.453 -0.675
8 Cluster_8 -0.495 -0.684 -0.444 0.598 -0.537
9 Cluster_9 1.73 -0.0376 -0.457 -0.0274 -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
grids_km3 <- kmeans(individual_matrix, centers = 9)
grids_km3$totss[1] 440484
grids_km3$withinss[1] 27946.96 28548.22 30640.87 24746.53 22405.00 31390.59 35361.58 23582.96
[9] 46064.03
grids_km3$betweenss[1] 169797.3
# wcss <- data.frame(
# k = rep(7, 7),
# wcss = grids_km3$withinss
# )# wcss2 <- data.frame(
# k = rep(9, 9),
# wcss = grids_km3$withinss
# )
#
# wcss <- rbind(wcss, wcss2)# write.csv(wcss, here::here("wcss-CORRECTED.csv"), row.names = FALSE)wcss <- read.csv(here::here("wcss-CORRECTED.csv"))wcss_means <- wcss %>%
group_by(k) %>%
summarize(mean = mean(wcss))
wcss_means# A tibble: 7 × 2
k mean
<int> <dbl>
1 3 117139.
2 4 81743.
3 5 62240.
4 6 50292.
5 7 41666.
6 8 34812.
7 9 30076.
ggplot(aes(x = k, y = wcss), data=wcss) +
geom_point() +
geom_point(aes(x=k, y=mean, color="red"), data=wcss_means) +
labs(x = "K", y="Within Cluster Sums of Square") +
theme_minimal() +
theme(panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank(),
legend.position = "none")centroids <- km_fitted3 |> extract_centroids()
centroids# A tibble: 9 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 -0.195 -0.287 -0.201 -0.300 -0.294
2 Cluster_2 -0.0671 -0.291 -0.284 -0.213 -0.245
3 Cluster_3 -0.277 0.956 1.10 -0.477 -0.301
4 Cluster_4 -0.164 -0.450 -0.330 -0.192 1.63
5 Cluster_5 -0.315 -0.405 -0.466 0.0971 -0.260
6 Cluster_6 -0.217 -0.363 -0.336 1.73 0.583
7 Cluster_7 -0.182 1.27 1.21 -0.453 -0.675
8 Cluster_8 -0.495 -0.684 -0.444 0.598 -0.537
9 Cluster_9 1.73 -0.0376 -0.457 -0.0274 -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
centroids <- km_fitted3 |> extract_centroids()
centroids# A tibble: 9 × 13
.cluster bear_score bee_score meadow_score trout_score eagle_score
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Cluster_1 -0.195 -0.287 -0.201 -0.300 -0.294
2 Cluster_2 -0.0671 -0.291 -0.284 -0.213 -0.245
3 Cluster_3 -0.277 0.956 1.10 -0.477 -0.301
4 Cluster_4 -0.164 -0.450 -0.330 -0.192 1.63
5 Cluster_5 -0.315 -0.405 -0.466 0.0971 -0.260
6 Cluster_6 -0.217 -0.363 -0.336 1.73 0.583
7 Cluster_7 -0.182 1.27 1.21 -0.453 -0.675
8 Cluster_8 -0.495 -0.684 -0.444 0.598 -0.537
9 Cluster_9 1.73 -0.0376 -0.457 -0.0274 -0.207
# ℹ 7 more variables: rabbit_score <dbl>, dragonfly_score <dbl>,
# fox_score <dbl>, deer_score <dbl>, stream_score <dbl>, wolves_score <dbl>,
# dv_score <dbl>
database_lowest_removed <- database_new %>%
filter(score > 57)# database4 <- database_lowest_removed %>%
# mutate(`7cluster_seed4` = extract_cluster_assignment(km_fitted3)$.cluster)# database4 <- database4 %>%
# mutate(`7cluster_seed5` = extract_cluster_assignment(km_fitted3)$.cluster)# seed <- database4 %>%
# group_by(`7cluster_seed41`) %>%
# summarize(mean_score=mean(score),
# sd_score=sd(score),
# .groups = 'drop') %>%
# arrange(desc(mean_score))
# seed# write.csv(seed, here::here("7cluster_seed.csv"), row.names = FALSE)database_new %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
min=quantile(score, probs=0),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
max=quantile(score, probs=1)
) mean_score sd_score min p25th median p75th max
1 61.67342 8.346453 17 57 63 67 88
database_lowest_removed <- database_new %>%
filter(score > 57)# database3 <- database_lowest_removed %>%
# mutate(`7cluster` = extract_cluster_assignment(km_fitted3)$.cluster)# database3 <- database3 %>%
# mutate(`7cluster_s5` = extract_cluster_assignment(km_fitted3)$.cluster)# database3 %>%
# group_by(`6cluster`) %>%
# summarize(mean_score=mean(score),
# sd_score=sd(score),
# p25th=quantile(score,probs=0.25),
# median=quantile(score,probs=0.5),
# p75th=quantile(score,probs=0.75),
# count=n(),
# .groups = 'drop') %>%
# arrange(desc(mean_score))# write.csv(database3, here::here("normalized_clusters-CORRECTED.csv"), row.names = FALSE)cluster_data <- read.csv(here::here("normalized_clusters-CORRECTED.csv"))sum((cluster_data %>% select(X6cluster)) == (cluster_data %>% select(X6cluster_s5)))[1] 26815
sum((cluster_data %>% select(X6cluster)) == (cluster_data %>% select(X6cluster_s6)))[1] 18244
sum((cluster_data %>% select(X6cluster_s5)) == (cluster_data %>% select(X6cluster_s6)))[1] 16986
nrow(cluster_data)[1] 36708
cluster_data %>%
group_by(X7cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')# A tibble: 7 × 7
X7cluster mean_score sd_score p25th median p75th count
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Cluster_1 65.3 4.62 62 65 68 5163
2 Cluster_2 64.1 4.04 61 64 67 6142
3 Cluster_3 68.6 5.28 65 69 72 6633
4 Cluster_4 66.6 5.13 63 66 70 5130
5 Cluster_5 63.3 3.74 60 63 66 6529
6 Cluster_6 66.3 4.73 63 66 70 3838
7 Cluster_7 64.6 4.71 61 64 68 3273
count_prop <- function(card_name, database){
df_pos <- data.frame(
row1col1 = as.numeric(nrow(database %>% filter(row1col1 == card_name))),
row1col2 = as.numeric(nrow(database %>% filter(row1col2 == card_name))),
row1col3 = as.numeric(nrow(database %>% filter(row1col3 == card_name))),
row1col4 = as.numeric(nrow(database %>% filter(row1col4 == card_name))),
row1col5 = as.numeric(nrow(database %>% filter(row1col5 == card_name))),
row2col1 = as.numeric(nrow(database %>% filter(row2col1 == card_name))),
row2col2 = as.numeric(nrow(database %>% filter(row2col2 == card_name))),
row2col3 = as.numeric(nrow(database %>% filter(row2col3 == card_name))),
row2col4 = as.numeric(nrow(database %>% filter(row2col4 == card_name))),
row2col5 = as.numeric(nrow(database %>% filter(row2col5 == card_name))),
row3col1 = as.numeric(nrow(database %>% filter(row3col1 == card_name))),
row3col2 = as.numeric(nrow(database %>% filter(row3col2 == card_name))),
row3col3 = as.numeric(nrow(database %>% filter(row3col3 == card_name))),
row3col4 = as.numeric(nrow(database %>% filter(row3col4 == card_name))),
row3col5 = as.numeric(nrow(database %>% filter(row3col5 == card_name))),
row4col1 = as.numeric(nrow(database %>% filter(row4col1 == card_name))),
row4col2 = as.numeric(nrow(database %>% filter(row4col2 == card_name))),
row4col3 = as.numeric(nrow(database %>% filter(row4col3 == card_name))),
row4col4 = as.numeric(nrow(database %>% filter(row4col4 == card_name))),
row4col5 = as.numeric(nrow(database %>% filter(row4col5 == card_name)))
)
df_pos_per <- apply(df_pos, 1, function(x) x/sum(x))
row <- c("row1", "row2", "row3", "row4")
col <- c("col1", "col2", "col3", "col4", "col5")
df_hm <- expand.grid(col = col, row = row)
df_hm <- df_hm %>%
mutate(proportion = df_pos_per[,1])
return(df_hm)
}# bear_pos <- data.frame(
# row1col1 = as.numeric(nrow(database_new %>% filter(row1col1 == "Bear"))),
# row1col2 = as.numeric(nrow(database_new %>% filter(row1col2 == "Bear"))),
# row1col3 = as.numeric(nrow(database_new %>% filter(row1col3 == "Bear"))),
# row1col4 = as.numeric(nrow(database_new %>% filter(row1col4 == "Bear"))),
# row1col5 = as.numeric(nrow(database_new %>% filter(row1col5 == "Bear"))),
# row2col1 = as.numeric(nrow(database_new %>% filter(row2col1 == "Bear"))),
# row2col2 = as.numeric(nrow(database_new %>% filter(row2col2 == "Bear"))),
# row2col3 = as.numeric(nrow(database_new %>% filter(row2col3 == "Bear"))),
# row2col4 = as.numeric(nrow(database_new %>% filter(row2col4 == "Bear"))),
# row2col5 = as.numeric(nrow(database_new %>% filter(row2col5 == "Bear"))),
# row3col1 = as.numeric(nrow(database_new %>% filter(row3col1 == "Bear"))),
# row3col2 = as.numeric(nrow(database_new %>% filter(row3col2 == "Bear"))),
# row3col3 = as.numeric(nrow(database_new %>% filter(row3col3 == "Bear"))),
# row3col4 = as.numeric(nrow(database_new %>% filter(row3col4 == "Bear"))),
# row3col5 = as.numeric(nrow(database_new %>% filter(row3col5 == "Bear"))),
# row4col1 = as.numeric(nrow(database_new %>% filter(row4col1 == "Bear"))),
# row4col2 = as.numeric(nrow(database_new %>% filter(row4col2 == "Bear"))),
# row4col3 = as.numeric(nrow(database_new %>% filter(row4col3 == "Bear"))),
# row4col4 = as.numeric(nrow(database_new %>% filter(row4col4 == "Bear"))),
# row4col5 = as.numeric(nrow(database_new %>% filter(row4col5 == "Bear")))
# )
#
# bear_pos_per <- apply(bear_pos, 1, function(x) x/sum(x))
#
# row <- c("row1", "row2", "row3", "row4")
# col <- c("col1", "col2", "col3", "col4", "col5")
# bear_hm <- expand.grid(col = col, row = row)
# bear_hm <- bear_hm %>%
# mutate(proportion = bear_pos_per[,1])make_heatmap <- function(df, title){
ggplot(aes(x=col, y=row, fill=proportion), data=df) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
labs(title=title)
}# make_heatmap(count_prop("Bear", database_new), "Bear")# make_heatmap(count_prop("Bee", database_new), "Bee")# make_heatmap(count_prop("Meadow", database_new), "Meadow")# make_heatmap(count_prop("Trout", database_new), "Trout")# make_heatmap(count_prop("Eagle", database_new), "Eagle")# make_heatmap(count_prop("Rabbit", database_new), "Rabbit")# make_heatmap(count_prop("Dragonfly", database_new), "Dragonfly")# make_heatmap(count_prop("Fox", database_new), "Fox")# make_heatmap(count_prop("Deer", database_new), "Deer")# make_heatmap(count_prop("Stream", database_new), "Stream")# make_heatmap(count_prop("Wolf", database_new), "Wolf")bind_6clusters <- function(cluster_num){
rbind(count_prop("Bear",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bear"),
count_prop("Bee",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Bee"),
count_prop("Meadow",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Meadow"),
count_prop("Trout",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Trout"),
count_prop("Eagle",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Eagle"),
count_prop("Rabbit",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Rabbit"),
count_prop("Dragonfly",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Dragonfly"),
count_prop("Fox",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Fox"),
count_prop("Deer",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Deer"),
count_prop("Stream",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Stream"),
count_prop("Wolf",
df <- cluster_data %>%
filter(X6cluster == cluster_num)) %>% mutate(cluster = cluster_num, card = "Wolf")
)
}cluster_position <- rbind(bind_6clusters("Cluster_1"),
bind_6clusters("Cluster_2"),
bind_6clusters("Cluster_3"),
bind_6clusters("Cluster_4"),
bind_6clusters("Cluster_5"),
bind_6clusters("Cluster_6")
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position %>% filter(card == "Bear"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bear")overall_position <- rbind(count_prop("Bear", database_lowest_removed) %>% mutate(card = "Bear"),
count_prop("Bee", database_lowest_removed) %>% mutate(card = "Bee"),
count_prop("Meadow", database_lowest_removed) %>% mutate(card = "Meadow"),
count_prop("Trout", database_lowest_removed) %>% mutate(card = "Trout"),
count_prop("Eagle", database_lowest_removed) %>% mutate(card = "Eagle"),
count_prop("Rabbit", database_lowest_removed) %>% mutate(card = "Rabbit"),
count_prop("Dragonfly", database_lowest_removed) %>% mutate(card = "Dragonfly"),
count_prop("Fox", database_lowest_removed) %>% mutate(card = "Fox"),
count_prop("Deer", database_lowest_removed) %>% mutate(card = "Deer"),
count_prop("Stream", database_lowest_removed) %>% mutate(card = "Stream"),
count_prop("Wolf", database_lowest_removed) %>% mutate(card = "Wolf")
)ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Bear positions across clusters")cluster_table <- function(Xcluster, cluster_num, cluster_data){
df <- cluster_data %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num
)
) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop')
return(df)
}cluster_prop <- function(Xcluster, cluster_num, database){
df <- database %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num,
Xcluster == 8 ~ X8cluster == cluster_num,
Xcluster == 65 ~ X6cluster_s5 == cluster_num,
Xcluster == 66 ~ X6cluster_s6 == cluster_num,
Xcluster == 85 ~ X8cluster_s5 == cluster_num,
Xcluster == 75 ~ X7cluster_s5 == cluster_num
)
)
bear_count = 0
bee_count = 0
meadow_count = 0
trout_count = 0
eagle_count = 0
rabbit_count = 0
dragonfly_count = 0
fox_count = 0
deer_count = 0
stream_count = 0
wolf_count = 0
for (j in 2:21){
for(i in 1:nrow(df)){
if(df[i,j] == "Bear"){
bear_count = bear_count + 1
}else if(df[i,j] == "Bee"){
bee_count = bee_count + 1
}else if(df[i,j] == "Meadow"){
meadow_count = meadow_count + 1
}else if(df[i,j] == "Trout"){
trout_count = trout_count + 1
}else if(df[i,j] == "Eagle"){
eagle_count = eagle_count + 1
}else if(df[i,j] == "Rabbit"){
rabbit_count = rabbit_count + 1
}else if(df[i,j] == "Dragonfly"){
dragonfly_count = dragonfly_count + 1
}else if(df[i,j] == "Fox"){
fox_count = fox_count + 1
}else if(df[i,j] == "Deer"){
deer_count = deer_count + 1
}else if(df[i,j] == "Stream"){
stream_count = stream_count + 1
}else if(df[i,j] == "Wolf"){
wolf_count = wolf_count + 1
}else{
print("bugged")
}
}
}
df_prop <- data.frame(
name = c("bear", "bee", "meadow", "trout", "eagle", "rabbit",
"dragonfly", "fox", "deer", "stream", "wolf"),
proportion = c(bear_count/(nrow(df)*20),
bee_count/(nrow(df)*20),
meadow_count/(nrow(df)*20),
trout_count/(nrow(df)*20),
eagle_count/(nrow(df)*20),
rabbit_count/(nrow(df)*20),
dragonfly_count/(nrow(df)*20),
fox_count/(nrow(df)*20),
deer_count/(nrow(df)*20),
stream_count/(nrow(df)*20),
wolf_count/(nrow(df)*20)),
true_prop = c(12/130,
8/130,
20/130,
10/130,
8/130,
8/130,
8/130,
12/130,
12/130,
20/130,
12/130)
)
return(df_prop)
}make_bars <- function(df, title){
ggplot(aes(x = reorder(name, -proportion), y = proportion, fill = reorder(name, -proportion)), data=df) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette="PRGn", direction = -1) +
labs(x = "card", title=title) +
theme(legend.position = "none")
}exact_card_count <- function(Xcluster=NULL, cluster_num=NULL, database){
bear_exact <- rep(0, 13)
bee_exact <- rep(0, 9)
meadow_exact <- rep(0, 21)
trout_exact <- rep(0, 11)
eagle_exact <- rep(0, 9)
rabbit_exact <- rep(0, 9)
dragonfly_exact <- rep(0, 9)
fox_exact <- rep(0, 13)
deer_exact <- rep(0, 13)
stream_exact <- rep(0, 21)
wolf_exact <- rep(0, 13)
if(is.null(Xcluster) && is.null(cluster_num)){
df <- database %>%
filter(pool == "default")
}else{
df <- database %>%
filter(
case_when(
Xcluster == 7 ~ X7cluster == cluster_num,
Xcluster == 6 ~ X6cluster == cluster_num,
Xcluster == 5 ~ X5cluster == cluster_num,
Xcluster == 4 ~ X4cluster == cluster_num,
Xcluster == 3 ~ X3cluster == cluster_num
)
)
}
for(i in 1:nrow(df)){
bear_count <- 0
bee_count <- 0
meadow_count <- 0
trout_count <- 0
eagle_count <- 0
rabbit_count <- 0
dragonfly_count <- 0
fox_count <- 0
deer_count <- 0
stream_count <- 0
wolf_count <- 0
for(j in 2:21){
if(df[i,j] == "Bear"){
bear_count = bear_count + 1
}else if(df[i,j] == "Bee"){
bee_count = bee_count + 1
}else if(df[i,j] == "Meadow"){
meadow_count = meadow_count + 1
}else if(df[i,j] == "Trout"){
trout_count = trout_count + 1
}else if(df[i,j] == "Eagle"){
eagle_count = eagle_count + 1
}else if(df[i,j] == "Rabbit"){
rabbit_count = rabbit_count + 1
}else if(df[i,j] == "Dragonfly"){
dragonfly_count = dragonfly_count + 1
}else if(df[i,j] == "Fox"){
fox_count = fox_count + 1
}else if(df[i,j] == "Deer"){
deer_count = deer_count + 1
}else if(df[i,j] == "Stream"){
stream_count = stream_count + 1
}else if(df[i,j] == "Wolf"){
wolf_count = wolf_count + 1
}else{
print("bugged")
}
}
bear_exact[bear_count+1] = bear_exact[bear_count+1] + 1
bee_exact[bee_count+1] = bee_exact[bee_count+1] + 1
meadow_exact[meadow_count+1] = meadow_exact[meadow_count+1] + 1
trout_exact[trout_count+1] = trout_exact[trout_count+1] + 1
eagle_exact[eagle_count+1] = eagle_exact[eagle_count+1] + 1
rabbit_exact[rabbit_count+1] = rabbit_exact[rabbit_count+1] + 1
dragonfly_exact[dragonfly_count+1] = dragonfly_exact[dragonfly_count+1] + 1
fox_exact[fox_count+1] = fox_exact[fox_count+1] + 1
deer_exact[deer_count+1] = deer_exact[deer_count+1] + 1
stream_exact[stream_count+1] = stream_exact[stream_count+1] + 1
wolf_exact[wolf_count+1] = wolf_exact[wolf_count+1] + 1
}
bear_exact[8] = bear_exact[8]+bear_exact[9]+bear_exact[10]+
bear_exact[11]+bear_exact[12]+bear_exact[13]
bee_exact[8] = bee_exact[8]+bee_exact[9]
meadow_exact[8] = meadow_exact[8]+meadow_exact[9]+meadow_exact[10]+
meadow_exact[11]+meadow_exact[12]+meadow_exact[13]+meadow_exact[14]+
meadow_exact[15]+meadow_exact[16]+meadow_exact[17]+meadow_exact[18]+
meadow_exact[19]+meadow_exact[20]+meadow_exact[21]
trout_exact[8] = trout_exact[8]+trout_exact[9]+trout_exact[10]+
trout_exact[11]
eagle_exact[8] = eagle_exact[8]+eagle_exact[9]
rabbit_exact[8] = rabbit_exact[8]+rabbit_exact[9]
dragonfly_exact[8] = dragonfly_exact[8]+dragonfly_exact[9]
fox_exact[8] = fox_exact[8]+fox_exact[9]+fox_exact[10]+
fox_exact[11]+fox_exact[12]+fox_exact[13]
deer_exact[8] = deer_exact[8]+deer_exact[9]+deer_exact[10]+
deer_exact[11]+deer_exact[12]+deer_exact[13]
stream_exact[8] = stream_exact[8]+stream_exact[9]+stream_exact[10]+
stream_exact[11]+stream_exact[12]+stream_exact[13]+stream_exact[14]+
stream_exact[15]+stream_exact[16]+stream_exact[17]+stream_exact[18]+
stream_exact[19]+stream_exact[20]+stream_exact[21]
wolf_exact[8] = wolf_exact[8]+wolf_exact[9]+wolf_exact[10]+
wolf_exact[11]+wolf_exact[12]+wolf_exact[13]
bear_exact = bear_exact[1:8]
bee_exact = bee_exact[1:8]
meadow_exact = meadow_exact[1:8]
trout_exact = trout_exact[1:8]
eagle_exact = eagle_exact[1:8]
rabbit_exact = rabbit_exact[1:8]
dragonfly_exact = dragonfly_exact[1:8]
fox_exact = fox_exact[1:8]
deer_exact = deer_exact[1:8]
stream_exact = stream_exact[1:8]
wolf_exact = wolf_exact[1:8]
result <- data.frame(
card = c(rep("Bear", 8),
rep("Bee", 8),
rep("Meadow", 8),
rep("Trout", 8),
rep("Eagle", 8),
rep("Rabbit", 8),
rep("Dragonfly", 8),
rep("Fox", 8),
rep("Deer", 8),
rep("Stream", 8),
rep("Wolf", 8)
),
num_exact = c(seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7),
seq(0,7)
),
proportion = c(bear_exact/(nrow(df)),
bee_exact/(nrow(df)),
meadow_exact/(nrow(df)),
trout_exact/(nrow(df)),
eagle_exact/(nrow(df)),
rabbit_exact/(nrow(df)),
dragonfly_exact/(nrow(df)),
fox_exact/(nrow(df)),
deer_exact/(nrow(df)),
stream_exact/(nrow(df)),
wolf_exact/(nrow(df))
)
)
return(result)
}make_exact_bars <- function(df, title){
ggplot(aes(x = factor(num_exact),
y = proportion,
fill = factor(num_exact)),
data =
(df %>%
filter(proportion != 0))
) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card),scales = "free_x") +
labs(x="Exact number of cards",
fill="Exact number of cards",
title=title)
}nrow(database_new %>% filter(pool == "default"))[1] 30000
default_pool_props <- exact_card_count(database=database_new)ggplot(aes(x = factor(num_exact),
y = proportion,
fill = factor(num_exact)),
data =
(default_pool_props %>%
filter(proportion != 0))
) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
labs(x="Exact number of cards",
fill="Exact number of cards",
title="Overall proportions of the default pool")Summary statistics for cluster 7
# cluster_table(7, "Cluster_7", cluster_data)Proportion of each card type out of all the cards in the cluster
# make_bars(cluster_prop(7, "Cluster_7", cluster_data), "7Cluster: Cluster_7")Distributions of the exact number of each card type out of all grids in the cluster
# make_exact_bars(exact_card_count(7, "Cluster_7", cluster_data), title="7Cluster: Cluster_7")# cluster_table(7, "Cluster_6", cluster_data)# make_bars(cluster_prop(7, "Cluster_6", cluster_data), "7Cluster: Cluster_6")# make_exact_bars(exact_card_count(7, "Cluster_6", cluster_data), title="7Cluster: Cluster_6")# cluster_table(7, "Cluster_3", cluster_data)# make_bars(cluster_prop(7, "Cluster_3", cluster_data), "7Cluster: Cluster_3")# make_exact_bars(exact_card_count(7, "Cluster_3", cluster_data), title="7Cluster: Cluster_3")# cluster_table(7, "Cluster_4", cluster_data)# make_bars(cluster_prop(7, "Cluster_4", cluster_data), "7Cluster: Cluster_4")# make_exact_bars(exact_card_count(7, "Cluster_4", cluster_data), title="7Cluster: Cluster_4")# cluster_table(7, "Cluster_2", cluster_data)# make_bars(cluster_prop(7, "Cluster_2", cluster_data), "7Cluster: Cluster_2")# make_exact_bars(exact_card_count(7, "Cluster_2", cluster_data), title="7Cluster: Cluster_2")# cluster_table(7, "Cluster_1", cluster_data)# make_bars(cluster_prop(7, "Cluster_1", cluster_data), "7Cluster: Cluster_1")# make_exact_bars(exact_card_count(7, "Cluster_1", cluster_data), title="7Cluster: Cluster_1")# cluster_table(7, "Cluster_5", cluster_data)# make_bars(cluster_prop(7, "Cluster_5", cluster_data), "7Cluster: Cluster_5")# make_exact_bars(exact_card_count(7, "Cluster_5", cluster_data), title="7Cluster: Cluster_5")overall_position <- overall_position %>%
mutate(
card = factor(card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
)))ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Positions across all 50,000")# all_cluster_prop7_s5 <- rbind(cluster_prop(75, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
# cluster_prop(75, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
# cluster_prop(75, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
# cluster_prop(75, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
# cluster_prop(75, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
# cluster_prop(75, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6"),
# cluster_prop(75, "Cluster_7", cluster_data) %>% mutate(cluster = "Cluster_7")
# )# all_exact <- rbind(exact_card_count(6, "Cluster_1", cluster_data) %>% mutate(cluster = "Cluster_1"),
# exact_card_count(6, "Cluster_2", cluster_data) %>% mutate(cluster = "Cluster_2"),
# exact_card_count(6, "Cluster_3", cluster_data) %>% mutate(cluster = "Cluster_3"),
# exact_card_count(6, "Cluster_4", cluster_data) %>% mutate(cluster = "Cluster_4"),
# exact_card_count(6, "Cluster_5", cluster_data) %>% mutate(cluster = "Cluster_5"),
# exact_card_count(6, "Cluster_6", cluster_data) %>% mutate(cluster = "Cluster_6")
# )# write.csv(all_exact, here::here("all_exact-CORRECTED.csv"), row.names = FALSE)# write.csv(all_cluster_prop, here::here("cluster_prop6-CORRECTED.csv"), row.names = FALSE)cluster_position_renamed <- cluster_position %>%
mutate(
cluster = factor(cluster, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(cluster = fct_recode(cluster,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
))all_cluster_prop <- read.csv(here::here("cluster_prop6-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))all_cluster_prop <- all_cluster_prop %>%
mutate(
name = case_when(
name == "bear" ~ "Bear",
name == "bee" ~ "Bee",
name == "meadow" ~ "Meadow",
name == "stream" ~ "Stream",
name == "dragonfly" ~ "Dragonfly",
name == "eagle" ~ "Eagle",
name == "rabbit" ~ "Rabbit",
name == "fox" ~ "Fox",
name == "deer" ~ "Deer",
name == "trout" ~ "Trout",
name == "wolf" ~ "Wolf",
)
)all_cluster_prop_renamed <- all_cluster_prop %>%
mutate(
cluster = factor(cluster, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(cluster = fct_recode(cluster,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
))all_exact <- read.csv(here::here("all_exact-CORRECTED.csv")) all_exact_renamed <- all_exact %>%
mutate(
cluster = factor(cluster, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(cluster = fct_recode(cluster,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
))cluster_data_renamed <- cluster_data %>%
mutate(
X6cluster = factor(X6cluster, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(X6cluster = fct_recode(X6cluster,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
))results_table <- cluster_data_renamed %>%
group_by(X6cluster) %>%
summarize(mean_score=mean(score),
sd_score=sd(score),
p25th=quantile(score,probs=0.25),
median=quantile(score,probs=0.5),
p75th=quantile(score,probs=0.75),
count=n(),
.groups = 'drop') %>%
arrange(desc(mean_score))
results_table# A tibble: 6 × 7
X6cluster mean_score sd_score p25th median p75th count
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Meadow Bee 67.8 5.35 64 68 72 7315
2 Eagle Rabbit 66.6 5.18 62 66 70 5852
3 Elevated Trout 66.4 4.82 63 66 70 4265
4 Bear Trout Bee 65.4 4.73 62 65 69 5560
5 Stream Dragonfly 64.1 4.08 61 64 67 6448
6 Deer Fox 63.5 3.91 60 63 66 7268
# write.csv(results_table, here::here("results_table-CORRECTED.csv"), row.names = FALSE)# write.csv(all_cluster_prop7, here::here("cluster_prop7-CORRECTED.csv"), row.names = FALSE)all_cluster_prop7 <- read.csv(here::here("cluster_prop7-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))# cluster_data_renamed %>%
# group_by(X7cluster) %>%
# summarize(mean_score=mean(score),
# sd_score=sd(score),
# p25th=quantile(score,probs=0.25),
# median=quantile(score,probs=0.5),
# p75th=quantile(score,probs=0.75),
# count=n(),
# .groups = 'drop') %>%
# arrange(desc(mean_score))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop7) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop7,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop7_s5, here::here("cluster_prop7_seed5-CORRECTED.csv"), row.names = FALSE)all_cluster_prop7_s5 <- read.csv(here::here("cluster_prop7_seed5-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:7)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop7_s5) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop7_s5,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop6, here::here("cluster_prop6-CORRECTED.csv"), row.names = FALSE)all_cluster_prop6 <- read.csv(here::here("cluster_prop6-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop) +
geom_bar(stat = "identity") +
geom_tile(
aes(
x = reorder(name, -true_prop),
y = true_prop,
width = 1,
height = 0
),
color = "red",
fill = "red",
linetype = "solid",
linewidth = 0.7,
alpha = 0.7
) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop6_s5, here::here("cluster_prop6_seed5-CORRECTED.csv"), row.names = FALSE)all_cluster_prop6_s5 <- read.csv(here::here("cluster_prop6_seed5-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop6_s5) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop6_s5,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop6_s6, here::here("cluster_prop6_seed6-CORRECTED.csv"), row.names = FALSE)all_cluster_prop6_s6 <- read.csv(here::here("cluster_prop6_seed6-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:6)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop6_s6) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop6_s6,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop5, here::here("cluster_prop5-CORRECTED.csv"), row.names = FALSE)all_cluster_prop5 <- read.csv(here::here("cluster_prop5-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:5)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop5) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop5,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop8, here::here("cluster_prop8-CORRECTED.csv"), row.names = FALSE)all_cluster_prop8 <- read.csv(here::here("cluster_prop8-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:8)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop8) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop8,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))# write.csv(all_cluster_prop8_s5, here::here("cluster_prop8_seed5-CORRECTED.csv"), row.names = FALSE)all_cluster_prop8_s5 <- read.csv(here::here("cluster_prop8_seed5-CORRECTED.csv")) %>%
mutate(cluster = factor(cluster, levels = paste0("Cluster_", 1:8)))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop8_s5) +
geom_bar(stat = "identity") +
geom_point(aes(x = reorder(name, -true_prop), y= true_prop),
color = "red",
data=all_cluster_prop8_s5,
show.legend = FALSE) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y = "Proportion", fill="Card Type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))ggplot(aes(x = reorder(name, -true_prop),
y = proportion,
fill = reorder(name, -true_prop)),
data=all_cluster_prop_renamed) +
geom_bar(stat = "identity") +
geom_tile(
aes(
x = reorder(name, -true_prop),
y = true_prop,
width = 1,
height = 0
),
color = "red",
fill = "red",
linetype = "solid",
linewidth = 0.7,
alpha = 0.7
) +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y="Proportion", fill="Card type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))all_cluster_prop_renamed2 <- all_cluster_prop_renamed %>%
mutate(prop_diff = proportion-true_prop)ggplot(aes(x = reorder(name, -true_prop),
y = prop_diff,
fill = reorder(name, -true_prop)),
data=all_cluster_prop_renamed2) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(x = "Card", y="Difference in Proportion", fill="Card type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(cluster))ggplot(aes(x = cluster,
y = proportion,
fill = cluster),
data=all_cluster_prop_renamed) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
labs(x = "Card", y="Proportion", fill="Card type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(reorder(name, -true_prop)))ggplot(aes(x = cluster,
y = prop_diff,
fill = cluster),
data=all_cluster_prop_renamed2) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
labs(x = "Card", y="Difference in Proportion", fill="Card type", title="Propotion of card types in each cluster") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank()) +
facet_wrap(vars(reorder(name, -true_prop)))ggplot(all_cluster_prop_renamed, aes(x = proportion, y = cluster, color = reorder(name, -true_prop))) +
geom_point(
size = 3,
alpha = 0.8,
position = position_jitter(height = 0.2, seed = 110)
) +
scale_color_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
labs(
x = "Proportion",
y = "Cluster",
color = "Card Type",
title = "Card Proportions by Cluster"
) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
legend.position = "right",
panel.grid.major = element_line(color = "black"),
)all_cluster_prop_renamed <- all_cluster_prop_renamed %>%
mutate(
name = factor(name, levels = c(
"meadow", "stream", "bear",
"deer", "fox", "wolf", "trout", "bee",
"dragonfly", "eagle", "rabbit"
)))ggplot(all_cluster_prop_renamed, aes(x = proportion, y = name, color = cluster)) +
geom_point(
size = 3,
position = position_jitter(height = 0.2, seed = 99)
) +
scale_y_discrete(limits = rev) +
scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
labs(
x = "Proportion",
y = "Card Type",
color = "Cluster",
title = "Card Proportions by Cluster"
) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
legend.position = "right",
panel.grid.major = element_line(color = "black")
)ggplot(aes(x = cluster,
y = proportion,
fill = factor(num_exact, levels = rev(0:7))),
data = all_exact
) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(x="Clusters",
fill="Exact number of cards",
title="Exact number of cards in each cluster")ggplot(all_exact, aes(x = num_exact, y = proportion, color = cluster)) +
geom_line(size = 0.5) +
facet_wrap(~ card) +
scale_x_continuous(breaks = 0:7) +
labs(x = "Exact Number of Cards",
y = "Proportion",
color = "Cluster",
title = "Exact number of cards in each cluster") +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(1, "lines"))ggplot(all_exact_renamed, aes(x = num_exact, y = proportion, color = cluster)) +
geom_point() +
geom_line(size = 0.5) +
facet_wrap(~ card) +
scale_x_continuous(breaks = 0:7) +
scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
labs(x = "Exact Number of Cards",
y = "Proportion",
color = "Cluster",
title = "Exact number of cards in each cluster") +
theme_minimal() +
theme(legend.position = "bottom",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
panel.spacing = unit(1, "lines"))db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")cluster_merge_renamed <- cluster_merge %>%
mutate(
cluster = factor(X6cluster, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(X6cluster = fct_recode(X6cluster,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
))ggplot(cluster_merge_renamed |>
mutate(cluster = factor(X6cluster)),
aes(x = score, color = cluster, linetype = cluster)) +
# geom_density(size = 1.2) +
stat_density(size = 1.1, geom="line",position="identity") +
labs(x = "Score", y = "Density", color = "Cluster", linetype = "Cluster", title="Score distribution by cluster") +
scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
theme_minimal()results_table# A tibble: 6 × 7
X6cluster mean_score sd_score p25th median p75th count
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Meadow Bee 67.8 5.35 64 68 72 7315
2 Eagle Rabbit 66.6 5.18 62 66 70 5852
3 Elevated Trout 66.4 4.82 63 66 70 4265
4 Bear Trout Bee 65.4 4.73 62 65 69 5560
5 Stream Dragonfly 64.1 4.08 61 64 67 6448
6 Deer Fox 63.5 3.91 60 63 66 7268
cluster_pool <- cluster_merge_renamed %>%
group_by(`X6cluster`, pool) %>%
summarize(count = n()) %>%
mutate(prop = count / sum(count))
cluster_pool# A tibble: 24 × 4
# Groups: X6cluster [6]
X6cluster pool count prop
<fct> <chr> <int> <dbl>
1 Bear Trout Bee bee_meadow 541 0.0973
2 Bear Trout Bee default 3551 0.639
3 Bear Trout Bee dragonfly_stream 264 0.0475
4 Bear Trout Bee low_eagle_rabbit 1204 0.217
5 Stream Dragonfly bee_meadow 186 0.0288
6 Stream Dragonfly default 2718 0.422
7 Stream Dragonfly dragonfly_stream 2647 0.411
8 Stream Dragonfly low_eagle_rabbit 897 0.139
9 Meadow Bee bee_meadow 3132 0.428
10 Meadow Bee default 2834 0.387
# ℹ 14 more rows
cluster_pool_renamed <- cluster_pool %>%
mutate(
pool = factor(pool, levels = c(
"default", "bee_meadow", "dragonfly_stream",
"low_eagle_rabbit"
))) %>%
mutate(pool = fct_recode(pool,
"Default Pool" = "default",
"Meadow Bee Pool" = "bee_meadow",
"Stream Dragonfly Pool" = "dragonfly_stream",
"Low Eagle Rabbit Pool" = "low_eagle_rabbit"
))ggplot(aes(x = pool,
y = prop,
fill = pool),
data=cluster_pool_renamed) +
geom_bar(stat = "identity") +
labs(x = "Pool Sampled", y="Proportion", fill="Pool Sampled", title="Pool sampled in each cluster") +
theme_minimal() +
theme(panel.grid.major = element_line(color = "black"),
panel.grid.minor = element_line(color = "black"),
panel.grid.major.x = element_blank(), # Remove vertical major grid lines
panel.grid.minor.x = element_blank(),
axis.text.x = element_blank(), # Removes text
axis.ticks.x = element_blank()) +
facet_wrap(vars(X6cluster))cluster_div <- cluster_merge_renamed %>%
group_by(`X6cluster`, dv_score) %>%
summarize(count = n()) %>%
mutate(prop = count / sum(count)) %>%
arrange(desc(dv_score))
cluster_div# A tibble: 26 × 4
# Groups: X6cluster [6]
X6cluster dv_score count prop
<fct> <int> <int> <dbl>
1 Bear Trout Bee 12 4570 0.822
2 Stream Dragonfly 12 4711 0.731
3 Meadow Bee 12 4727 0.646
4 Eagle Rabbit 12 5339 0.912
5 Deer Fox 12 6691 0.921
6 Elevated Trout 12 3387 0.794
7 Bear Trout Bee 7 839 0.151
8 Stream Dragonfly 7 1412 0.219
9 Meadow Bee 7 1946 0.266
10 Eagle Rabbit 7 453 0.0774
# ℹ 16 more rows
ggplot(aes(x = dv_score, y = prop, color = X6cluster, linetype = X6cluster), data=cluster_div) +
geom_line(size = 1.1) +
labs(x = "Score", y = "Density", color = "Cluster", linetype = "Cluster", title="Diversity Score distribution by cluster") +
scale_color_manual(values = c("#000000", "#377eb8", "#4daf4a", "#984ea3", "#a65628",
"#f781bf")) +
scale_x_continuous(
breaks = c(-5, 0, 3, 7, 12)) +
theme_minimal()cluster_merge_renamed %>%
filter(X6cluster == "Meadow Bee") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 5 × 2
dv_score count
<int> <int>
1 12 4727
2 7 1946
3 3 562
4 0 78
5 -5 2
cluster_merge_renamed %>%
filter(X6cluster == "Stream Dragonfly") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 5 × 2
dv_score count
<int> <int>
1 12 4711
2 7 1412
3 3 290
4 0 33
5 -5 2
cluster_merge_renamed %>%
filter(X6cluster == "Bear Trout Bee") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 4 × 2
dv_score count
<int> <int>
1 12 4570
2 7 839
3 3 141
4 0 10
cluster_merge_renamed %>%
filter(X6cluster == "Eagle Rabbit") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 4 × 2
dv_score count
<int> <int>
1 12 5339
2 7 453
3 3 55
4 0 5
cluster_merge_renamed %>%
filter(X6cluster == "Deer Fox") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 4 × 2
dv_score count
<int> <int>
1 12 6691
2 7 544
3 3 32
4 0 1
cluster_merge_renamed %>%
filter(X6cluster == "Elevated Trout") %>%
group_by(dv_score) %>%
summarize(count = n()) %>%
arrange(desc(dv_score))# A tibble: 4 × 2
dv_score count
<int> <int>
1 12 3387
2 7 754
3 3 121
4 0 3
cluster_share <- cluster_merge_renamed %>%
group_by(`X6cluster`) %>%
summarize(meadow = mean(meadow_score),
stream = mean(stream_score),
bear = mean(bear_score),
deer = mean(deer_score),
fox = mean(fox_score),
wolf = mean(wolves_score),
trout = mean(trout_score),
bee = mean(bee_score),
dragonfly = mean(dragonfly_score),
eagle = mean(eagle_score),
rabbit = mean(rabbit_score),
diversity = mean(dv_score),
total = mean(score)
) %>%
arrange(desc(total))
cluster_share# A tibble: 6 × 14
X6cluster meadow stream bear deer fox wolf trout bee dragonfly eagle
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Meadow Bee 14.5 2.25 4.61 6.01 4.56 1.48 4.05 14.5 2.22 2.93
2 Eagle Rabbit 6.21 2.32 5.20 6.42 4.66 1.47 5.21 5.23 2.44 13.6
3 Elevated Tr… 6.19 1.83 4.91 6.81 5.13 1.84 12.6 5.71 1.42 8.22
4 Bear Trout … 5.44 2.25 13.0 6.14 5.13 1.64 5.92 7.53 2.23 4.29
5 Stream Drag… 5.10 5.10 4.14 6.01 4.39 1.28 7.21 4.73 11.3 3.64
6 Deer Fox 6.76 2.50 4.75 10.9 7.26 1.86 4.54 6.15 2.85 3.37
# ℹ 3 more variables: rabbit <dbl>, diversity <dbl>, total <dbl>
cluster_position_renamed <- cluster_position_renamed %>%
mutate(
card = factor(card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
)))ggplot(aes(x=col, y=row, fill=proportion), data=(overall_position)) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Positions across all 50,000", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Bear Trout Bee"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Bear Trout Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Bear Trout Bee",
card %in% c("Meadow", "Stream", "Dragonfly", "Eagle")))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Bear Trout Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Stream Dragonfly"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Stream Dragonfly cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Stream Dragonfly",
card %in% c("Meadow", "Stream", "Bear", "Bee")))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Stream Dragonfly cluster", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Meadow Bee"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Meadow Bee cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Meadow Bee",
card %in% c("Meadow", "Stream", "Trout", "Bee")))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limits = c(0, NA)) +
facet_wrap(vars(card)) +
labs(title="Meadow Bee cluster", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Eagle Rabbit"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Eagle Rabbit cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Eagle Rabbit",
card %in% c("Meadow", "Eagle")))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837", limit = c(0,NA)) +
facet_wrap(vars(card)) +
labs(title="Eagle Rabbit cluster", x = "Row", y = "Column", fill = "Proportion") +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "pt"),
panel.grid = element_blank()
) +
coord_fixed(ratio = 0.8)ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Deer Fox"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Deer Fox cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>%
filter(cluster == "Deer Fox"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Deer Fox cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(cluster == "Elevated Trout"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(card)) +
labs(title="Elevated Trout cluster")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Bear"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bear positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Bee"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Bee positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Meadow"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Meadow positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Trout"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Trout positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Eagle"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Eagle positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Rabbit"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Rabbit positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Dragonfly"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Dragonfly positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Fox"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Fox positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Deer"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Deer positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Stream"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Stream positions across clusters")ggplot(aes(x=col, y=row, fill=proportion), data=(cluster_position_renamed %>% filter(card == "Wolf"))) +
geom_tile() +
theme_minimal() +
scale_fill_gradient(low="#F0F0F0", high="#006837") +
facet_wrap(vars(cluster)) +
labs(title="Wolf positions across clusters")db_pos = read_csv(here::here("database-CORRECTED-a.csv"))neighbors = db_pos |>
filter(score > 57) |>
select(!pool) |>
pivot_longer(!c(ID, score), names_to = "position", values_to = "card") |>
mutate(row = substr(position, start = 4, stop = 4),
col = substr(position, start = 8, stop = 8),
row = as.numeric(row),
col = as.numeric(col))# neighboring card on right
neighbors = neighbors |>
left_join(neighbors |>
mutate(col = col - 1) |>
rename(right_neighbor = card) |>
select(ID, row, col, right_neighbor),
join_by(ID, row, col))
# neighboring card on left
neighbors = neighbors |>
left_join(neighbors |>
mutate(col = col + 1) |>
rename(left_neighbor = card) |>
select(ID, row, col, left_neighbor),
join_by(ID, row, col))
# neighboring card up
neighbors = neighbors |>
left_join(neighbors |>
mutate(row = row - 1) |>
rename(up_neighbor = card) |>
select(ID, row, col, up_neighbor),
join_by(ID, row, col))
# neighboring card down
neighbors = neighbors |>
left_join(neighbors |>
mutate(row = row + 1) |>
rename(down_neighbor = card) |>
select(ID, row, col, down_neighbor),
join_by(ID, row, col))neighbors |> head(10)# A tibble: 10 × 10
ID score position card row col right_neighbor left_neighbor
<dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 1 68 row1col1 Dragonfly 1 1 Stream <NA>
2 1 68 row1col2 Stream 1 2 Deer Dragonfly
3 1 68 row1col3 Deer 1 3 Rabbit Stream
4 1 68 row1col4 Rabbit 1 4 Fox Deer
5 1 68 row1col5 Fox 1 5 <NA> Rabbit
6 1 68 row2col1 Bear 2 1 Trout <NA>
7 1 68 row2col2 Trout 2 2 Bear Bear
8 1 68 row2col3 Bear 2 3 Deer Trout
9 1 68 row2col4 Deer 2 4 Rabbit Bear
10 1 68 row2col5 Rabbit 2 5 <NA> Deer
# ℹ 2 more variables: up_neighbor <chr>, down_neighbor <chr>
neighbors_long = neighbors |>
pivot_longer(!c(ID, score, position, row, col, card),
names_to = "neighbor",
values_to = "neighbor_card")neighbors_long |> head(10)# A tibble: 10 × 8
ID score position card row col neighbor neighbor_card
<dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
1 1 68 row1col1 Dragonfly 1 1 right_neighbor Stream
2 1 68 row1col1 Dragonfly 1 1 left_neighbor <NA>
3 1 68 row1col1 Dragonfly 1 1 up_neighbor Bear
4 1 68 row1col1 Dragonfly 1 1 down_neighbor <NA>
5 1 68 row1col2 Stream 1 2 right_neighbor Deer
6 1 68 row1col2 Stream 1 2 left_neighbor Dragonfly
7 1 68 row1col2 Stream 1 2 up_neighbor Trout
8 1 68 row1col2 Stream 1 2 down_neighbor <NA>
9 1 68 row1col3 Deer 1 3 right_neighbor Rabbit
10 1 68 row1col3 Deer 1 3 left_neighbor Stream
neighbors_sum = neighbors_long |>
filter(!is.na(neighbor_card)) |>
group_by(card) |>
count(neighbor_card) |>
mutate(proportion = n / sum(n))ggplot(neighbors_sum,
aes(x = card,
y = neighbor_card,
fill = proportion)) +
geom_tile() +
scale_fill_distiller(palette = "Greens", direction = 1)neighbors_sum <- neighbors_sum %>%
mutate(
card = factor(card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
))) %>%
mutate(
neighbor_card = factor(neighbor_card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
)))ggplot(neighbors_sum,
aes(x = card,
fill = neighbor_card,
y = proportion)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62"))+
theme_minimal()ggplot(neighbors_sum,
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card))db_cluster = read_csv(here::here("normalized_clusters-CORRECTED.csv"))neighbors_cluster = db_cluster |>
pivot_longer(!c(ID, pool, score, contains("cluster")),
names_to = "position", values_to = "card") |>
mutate(row = substr(position, start = 4, stop = 4),
col = substr(position, start = 8, stop = 8),
row = as.numeric(row),
col = as.numeric(col))# neighboring card on right
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(col = col - 1) |>
rename(right_neighbor = card) |>
select(ID, row, col, right_neighbor),
join_by(ID, row, col))
# neighboring card on left
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(col = col + 1) |>
rename(left_neighbor = card) |>
select(ID, row, col, left_neighbor),
join_by(ID, row, col))
# neighboring card up
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(row = row - 1) |>
rename(up_neighbor = card) |>
select(ID, row, col, up_neighbor),
join_by(ID, row, col))
# neighboring card down
neighbors_cluster = neighbors_cluster |>
left_join(neighbors_cluster |>
mutate(row = row + 1) |>
rename(down_neighbor = card) |>
select(ID, row, col, down_neighbor),
join_by(ID, row, col))neighbors_cluster |> head(10)# A tibble: 10 × 22
ID score pool `7cluster` `3cluster` `4cluster` `5cluster` `6cluster`
<dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
2 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
3 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
4 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
5 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
6 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
7 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
8 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
9 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
10 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
# ℹ 14 more variables: `8cluster` <chr>, `9cluster` <chr>, `6cluster_s5` <chr>,
# `6cluster_s6` <chr>, `8cluster_s5` <chr>, `7cluster_s5` <chr>,
# position <chr>, card <chr>, row <dbl>, col <dbl>, right_neighbor <chr>,
# left_neighbor <chr>, up_neighbor <chr>, down_neighbor <chr>
neighbors_cluster_long = neighbors_cluster |>
pivot_longer(!c(ID, pool, contains("cluster"), score, position, row, col, card),
names_to = "neighbor",
values_to = "neighbor_card")neighbors_cluster_long |> head(10)# A tibble: 10 × 20
ID score pool `7cluster` `3cluster` `4cluster` `5cluster` `6cluster`
<dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
2 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
3 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
4 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
5 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
6 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
7 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
8 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
9 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
10 1 68 default Cluster_1 Cluster_1 Cluster_1 Cluster_1 Cluster_1
# ℹ 12 more variables: `8cluster` <chr>, `9cluster` <chr>, `6cluster_s5` <chr>,
# `6cluster_s6` <chr>, `8cluster_s5` <chr>, `7cluster_s5` <chr>,
# position <chr>, card <chr>, row <dbl>, col <dbl>, neighbor <chr>,
# neighbor_card <chr>
neighbors_sum2 = neighbors_cluster_long |>
filter(!is.na(neighbor_card)) |>
group_by(card, `6cluster`) |>
count(neighbor_card) |>
mutate(proportion = n / sum(n))neighbors_sum2_renamed <- neighbors_sum2 %>%
mutate(
`6cluster` = factor(`6cluster`, levels = c(
"Cluster_1", "Cluster_2", "Cluster_3",
"Cluster_4", "Cluster_5", "Cluster_6"
))) %>%
mutate(`6cluster` = fct_recode(`6cluster`,
"Bear Trout Bee" = "Cluster_1",
"Stream Dragonfly" = "Cluster_2",
"Meadow Bee" = "Cluster_3",
"Eagle Rabbit" = "Cluster_4",
"Deer Fox" = "Cluster_5",
"Elevated Trout" = "Cluster_6"
)) %>%
mutate(
neighbor_card = factor(neighbor_card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
))) %>%
mutate(
card = factor(card, levels = c(
"Meadow", "Stream", "Bear",
"Deer", "Fox", "Wolf", "Trout", "Bee",
"Dragonfly", "Eagle", "Rabbit"
)))ggplot(neighbors_sum2,
aes(x = `6cluster`,
fill = neighbor_card,
y = proportion)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_viridis_d() +
facet_wrap(vars(card)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))ggplot(neighbors_sum2_renamed,
aes(x = `6cluster`,
fill = neighbor_card,
y = proportion)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_manual(values = c("#4daf4a", "#377eb8", "#000000", "#a65628", "#ff7f00",
"#999999", "#f781bf", "#ffff33", "#66c2a5", "#984ea3", "#fc8d62")) +
facet_wrap(vars(card)) +
theme(axis.text.x = element_text(angle = 72, hjust = 1, size = 8)) +
labs(title = "Propotion of neighbor cards by cluster for each card type")ggplot(neighbors_sum2,
aes(x = card,
y = neighbor_card,
fill = proportion)) +
geom_tile() +
scale_fill_distiller(palette = "Greens", direction = 1) +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))ggplot(neighbors_sum2 %>% filter(card == "Bear"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Bear neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Bee"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Bee neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Meadow"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Meadow neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Trout"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Trout neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Eagle"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Eagle neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Rabbit"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Rabbit neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Dragonfly"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Dragonfly neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Fox"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Fox neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Deer"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Deer neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Stream"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Stream neighbors in each cluster")ggplot(neighbors_sum2 %>% filter(card == "Wolf"),
aes(x = neighbor_card,
y = proportion,
fill = neighbor_card)) +
geom_bar(stat = "identity") +
scale_fill_viridis_d() +
theme_minimal() +
facet_wrap(vars(`6cluster`)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
labs(title="Wolf neighbors in each cluster")db_individual_removed <- read.csv(here::here("db_individual_lowest_removed-CORRECTED.csv"))cluster_merge <- merge(x = cluster_data, y = db_individual_removed, by = "ID")ggplot(aes(x = score), data = cluster_merge) +
geom_histogram(binwidth = 5, fill = "green3") +
theme_minimal() +
facet_wrap(vars(X6cluster)) +
labs(title="Score distribution by cluster")ggplot(cluster_merge, aes(x = score, color = factor(X7cluster))) +
geom_density(size = 1.2) +
labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
theme_minimal()low_score = database_new %>% anti_join(cluster_merge, by = "ID")ggplot(cluster_merge, aes(x = score, color = factor(X6cluster))) +
geom_density(size = 1.2, key_glyph = "path") +
geom_density(aes(x=score, color = "Unclustered"), size = 1.2, data=low_score, key_glyph = "path") +
geom_density(aes(x=score, color = "All grids"), size = 1.2, data=database_new, key_glyph = "path") +
labs(x = "Score", y = "Density", color = "Cluster", title="Score distribution by cluster") +
theme_minimal()ggplot(database_new, aes(x = score, color = pool)) +
geom_density(size = 1.2, key_glyph = "path") +
labs(x = "Score", y = "Density", color = "Pool Sampled", title="Score distribution by pool") +
theme_minimal()database_start <- read.csv(here::here("database-CORRECTED-s.csv"))database_start_renamed <- database_start %>%
mutate(
pool = factor(pool, levels = c(
"default", "bee_meadow", "dragonfly_stream",
"low_eagle_rabbit"
))) %>%
mutate(pool = fct_recode(pool,
"Default Pool" = "default",
"Meadow Bee Pool" = "bee_meadow",
"Stream Dragonfly Pool" = "dragonfly_stream",
"Low Eagle Rabbit Pool" = "low_eagle_rabbit"
))database_new_renamed <- database_new %>%
mutate(
pool = factor(pool, levels = c(
"default", "bee_meadow", "dragonfly_stream",
"low_eagle_rabbit"
))) %>%
mutate(pool = fct_recode(pool,
"Default Pool" = "default",
"Meadow Bee Pool" = "bee_meadow",
"Stream Dragonfly Pool" = "dragonfly_stream",
"Low Eagle Rabbit Pool" = "low_eagle_rabbit"
))ggplot(database_start_renamed, aes(x = start_score, color = pool)) +
geom_density(aes(linetype = factor(pool)), size = 1.2, key_glyph = "path", show.legend = FALSE) +
stat_density(data=database_new_renamed, aes(x = score, color = pool), size = 1.1,
geom="line",position="identity") +
scale_linetype_manual(values = c("dashed", "dotted", "dotdash", "longdash", "twodash")) +
labs(x = "Score", y = "Density", color = "Pool Sampled", title="Start Score distribution by pool") +
theme_minimal()# write.csv(cluster_merge, here::here("cluster_merge.csv"), row.names = FALSE)# write.csv(neighbors_cluster_long, here::here("neighbor-cluster.csv"), row.names = FALSE)two_player <- list(c(37,4,2,2), c(45,3,3,3))
four_player <- list(c(37,5,1,2), c(45,2,4,3), c(60,0,0,0), c(47,4,3,4))
five_player <- list(c(37,3,0,2), c(45,2,4,3), c(60,3,0,0), c(47,2,0,4), c(38,1,4,4))mp_score(two_player)[1] 37 45
[1] 45 50
[1] 53 62
[1] 65 69
mp_score(four_player)[1] 37 45 60 47
[1] 45 45 60 52
[1] 49 57 60 60
[1] 61 60 72 63
mp_score(five_player)[1] 37 45 60 47 38
[1] 45 45 68 47 38
[1] 45 57 68 47 50
[1] 52 60 80 42 50
z <- score_grid(sample_grid)
z[1] 29 0 0 6
z2 <- score_grid(sample_grid2)
z2[1] 36 0 0 7
z3 <- score_grid(big_grid1)
z3[1] 43 3 1 0
set.seed(48)
test_grid <- generate_grid(cards)
test_grid [,1] [,2] [,3] [,4] [,5]
[1,] "Dragonfly" "Dragonfly" "Stream" "Meadow" "Fox"
[2,] "Bear" "Trout" "Meadow" "Fox" "Eagle"
[3,] "Trout" "Rabbit" "Stream" "Bee" "Stream"
[4,] "Wolf" "Rabbit" "Deer" "Meadow" "Bee"
x <- find_cardinals(0,3,test_grid)
x[[1]]
[1] 1 3
[[2]]
[1] 0 4
[[3]]
[1] 0 2
c <- sample(x, 1)
c[[1]]
[1] 1 3
"el"[1] "el"
c[[1]][1][1] 1
score_grid(test_grid)[1] 25 1 1 2
solo_score(score_grid(test_grid))[1] 39
set.seed(48)
baseline_scores <- baseline_sim(cards)mean(baseline_scores)[1] 28.3123
sd(baseline_scores)[1] 9.268601
var(baseline_scores)[1] 85.90696
max(baseline_scores)[1] 58
min(baseline_scores)[1] 2
summary(baseline_scores) Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 22.00 28.00 28.31 35.00 58.00
baseline_data <- data.frame(baseline_scores)
ggplot(aes(x = baseline_scores), data = baseline_data) +
geom_histogram(binwidth = 5, fill = "steelblue")set.seed(4)
slide_grid <- matrix(c("Stream", "Dragonfly", "Wolf", "Meadow", "Meadow", "Stream", "Trout", "Bear", "Bee", "Meadow",
"Stream", "Stream", "Deer", "Eagle", "Wolf", "Stream", "Dragonfly", "Fox", "Deer",
"Rabbit"),4,5,byrow=TRUE)
run1 <- rw_mcmc(slide_grid, 1000, "annealing dynamic", 0.9, 200, record_board = TRUE)
completed_slide <- matrix(run1[1:20], 4,5,byrow=TRUE)
slide_grid [,1] [,2] [,3] [,4] [,5]
[1,] "Stream" "Dragonfly" "Wolf" "Meadow" "Meadow"
[2,] "Stream" "Trout" "Bear" "Bee" "Meadow"
[3,] "Stream" "Stream" "Deer" "Eagle" "Wolf"
[4,] "Stream" "Dragonfly" "Fox" "Deer" "Rabbit"
solo_score(score_grid(slide_grid))[1] 65
completed_slide [,1] [,2] [,3] [,4] [,5]
[1,] "Stream" "Dragonfly" "Wolf" "Meadow" "Meadow"
[2,] "Stream" "Trout" "Bear" "Bee" "Meadow"
[3,] "Stream" "Stream" "Eagle" "Rabbit" "Deer"
[4,] "Dragonfly" "Stream" "Fox" "Deer" "Wolf"
solo_score(score_grid(completed_slide))[1] 67
slide_grid_op <- matrix(c("Stream", "Dragonfly", "Wolf", "Meadow", "Meadow", "Stream", "Trout", "Bear", "Bee", "Meadow",
"Stream", "Stream", "Eagle", "Rabbit", "Deer", "Stream", "Dragonfly", "Fox", "Deer",
"Wolf"),4,5,byrow=TRUE)
slide_grid_op [,1] [,2] [,3] [,4] [,5]
[1,] "Stream" "Dragonfly" "Wolf" "Meadow" "Meadow"
[2,] "Stream" "Trout" "Bear" "Bee" "Meadow"
[3,] "Stream" "Stream" "Eagle" "Rabbit" "Deer"
[4,] "Stream" "Dragonfly" "Fox" "Deer" "Wolf"
solo_score(score_grid(slide_grid_op))[1] 67
first_row <- score_grid(matrix(c(t(slide_grid)),nrow=4,ncol=5,byrow=T), individual=TRUE)
slide_score <- data.frame(
bear_score = c(first_row[1]),
bee_score = c(first_row[2]),
meadow_score = c(first_row[3]),
trout_score = c(first_row[4]),
eagle_score = c(first_row[5]),
rabbit_score = c(first_row[6]),
dragonfly_score = c(first_row[7]),
fox_score = c(first_row[8]),
deer_score = c(first_row[9]),
stream_score = c(first_row[10]),
wolves_score = c(first_row[11]),
dv_score = c(first_row[12])
)
slide_score bear_score bee_score meadow_score trout_score eagle_score rabbit_score
1 4 6 6 6 2 1
dragonfly_score fox_score deer_score stream_score wolves_score dv_score
1 10 3 8 5 2 12
# stream dragonfly trout
(choose(20, 7)*choose(8, 3)*choose(10, 3)*choose(92, 7))/choose(130, 20)[1] 2.726866e-05
(choose(20, 5)*choose(8, 2)*choose(10, 2)*choose(92, 11))/choose(130, 20)[1] 0.006274223
# stream dragonfly
(choose(20, 9)*choose(8, 4)*choose(102, 7))/choose(130, 20)[1] 1.297323e-06
(choose(20, 7)*choose(8, 3)*choose(102, 10))/choose(130, 20)[1] 0.0005525201
# fox deer/wolf deer
(choose(12, 4)*choose(12, 3)*choose(106, 13))/choose(130, 20)[1] 0.01035475
(choose(12, 3)*choose(12, 2)*choose(106, 15))/choose(130, 20)[1] 0.05625095
# bee meadow
(choose(20, 9)*choose(8, 4)*choose(102, 7))/choose(130, 20)[1] 1.297323e-06
(choose(20, 5)*choose(8, 2)*choose(102, 13))/choose(130, 20)[1] 0.02426065
# bear bee trout
(choose(12, 4)*choose(8, 3)*choose(10, 4)*choose(100, 9))/choose(130, 20)[1] 6.61645e-05
(choose(12, 3)*choose(8, 1)*choose(10, 2)*choose(100, 14))/choose(130, 20)[1] 0.02091068
# eagle rabbit
(choose(8, 3)*choose(8, 3)*choose(10, 3)*choose(104, 11))/choose(130, 20)[1] 0.0005015318
(choose(8, 2)*choose(8, 2)*choose(10, 1)*choose(104, 15))/choose(130, 20)[1] 0.02234951
xvy <- cluster_merge %>% filter(X7cluster == "Cluster_4")# fps2 <- read.csv(here::here("final-parameters-seed2.csv"))tuned_params <- read.csv(here::here("tuned-parameters.csv"))multivariate_hypergeometric <- function(desired_counts, deck_counts, total_draw, total_deck = NULL) {
# Input validation
if (length(desired_counts) != length(deck_counts)) {
stop("Length of desired_counts must match deck_counts")
}
if (any(desired_counts < 0) || any(deck_counts < 0)) {
stop("Counts cannot be negative")
}
if (total_draw <= 0) {
stop("Total draw must be positive")
}
# Calculate total deck size and other cards
if (is.null(total_deck)) {
total_deck <- sum(deck_counts)
warning("Assuming total deck size is sum of provided deck_counts (", total_deck,
"). Specify total_deck if different.")
}
other_cards <- total_deck - sum(deck_counts)
if (other_cards < 0) {
stop("Sum of deck_counts exceeds total_deck")
}
# Check if draw is possible
if (total_draw > total_deck) {
stop("Cannot draw more cards than exist in deck")
}
if (sum(desired_counts) > total_draw) {
return(0) # Impossible to get all desired counts
}
# Initialize probability
total_prob <- 0
# Recursive helper function
generate_combinations <- function(current, remaining_draw, remaining_types, prob_so_far) {
if (length(current) == length(desired_counts)) {
# Base case: all types assigned
remaining_cards <- remaining_draw
if (remaining_cards >= 0) {
# Calculate final probability for this combination
final_prob <- prob_so_far *
choose(other_cards, remaining_cards) /
choose(total_deck, total_draw)
return(final_prob)
} else {
return(0)
}
}
# Recursive case: assign counts for current type
type_index <- length(current) + 1
min_count <- desired_counts[type_index]
max_count <- min(deck_counts[type_index], remaining_draw)
prob_sum <- 0
for (count in min_count:max_count) {
new_current <- c(current, count)
new_remaining <- remaining_draw - count
new_prob <- prob_so_far * choose(deck_counts[type_index], count)
prob_sum <- prob_sum +
generate_combinations(new_current, new_remaining, remaining_types, new_prob)
}
return(prob_sum)
}
# Start recursive calculation
total_prob <- generate_combinations(
current = numeric(0),
remaining_draw = total_draw,
remaining_types = length(desired_counts),
prob_so_far = 1
)
return(total_prob)
}prob <- multivariate_hypergeometric(
desired_counts = c(2, 5), # Minimums for bees and meadows
deck_counts = c(8, 20), # Counts in deck for bees and meadows
total_draw = 20, # Drawing 20 cards
total_deck = 130 # Total cards in deck
)
prob[1] 0.04757679
# bear bee trout minimum
prob <- multivariate_hypergeometric(
desired_counts = c(3, 2, 2),
deck_counts = c(12, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.03272014
# bear bee trout maximum
prob <- multivariate_hypergeometric(
desired_counts = c(4, 3, 3),
deck_counts = c(12, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.0005701113
# deer fox wolf min
prob <- multivariate_hypergeometric(
desired_counts = c(3, 2, 1),
deck_counts = c(12, 12, 12),
total_draw = 20,
total_deck = 130
)
prob[1] 0.1203596
# deer fox wolf max
prob <- multivariate_hypergeometric(
desired_counts = c(4, 4, 2),
deck_counts = c(12, 12, 12),
total_draw = 20,
total_deck = 130
)
prob[1] 0.001886164
# bee meadow min
prob <- multivariate_hypergeometric(
desired_counts = c(2, 5), # Minimums for bees and meadows
deck_counts = c(8, 20), # Counts in deck for bees and meadows
total_draw = 20, # Drawing 20 cards
total_deck = 130 # Total cards in deck
)
prob[1] 0.04757679
# bee meadow max
prob <- multivariate_hypergeometric(
desired_counts = c(3, 8), # Minimums for bees and meadows
deck_counts = c(8, 20), # Counts in deck for bees and meadows
total_draw = 20, # Drawing 20 cards
total_deck = 130 # Total cards in deck
)
prob[1] 0.0001239406
# eagle rabbit trout min
prob <- multivariate_hypergeometric(
desired_counts = c(2, 2, 1),
deck_counts = c(8, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.0902812
# eagle rabbit trout max
prob <- multivariate_hypergeometric(
desired_counts = c(3, 3, 2),
deck_counts = c(8, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.002838628
# stream dragonfly trout min
prob <- multivariate_hypergeometric(
desired_counts = c(5, 2, 1),
deck_counts = c(20, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.03519305
# stream dragonfly trout max
prob <- multivariate_hypergeometric(
desired_counts = c(7, 3, 2),
deck_counts = c(20, 8, 10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.0001824627
# trout min
prob <- multivariate_hypergeometric(
desired_counts = c(3),
deck_counts = c(10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.1831558
# trout max
prob <- multivariate_hypergeometric(
desired_counts = c(4),
deck_counts = c(10),
total_draw = 20,
total_deck = 130
)
prob[1] 0.04698214
# stream dragonfly min
prob <- multivariate_hypergeometric(
desired_counts = c(5, 2),
deck_counts = c(20, 8),
total_draw = 20,
total_deck = 130
)
prob[1] 0.04757679
# stream dragonfly max
prob <- multivariate_hypergeometric(
desired_counts = c(7, 3),
deck_counts = c(20, 8),
total_draw = 20,
total_deck = 130
)
prob[1] 0.0007566569
1-phyper(3, 12, 118, 20)[1] 0.08923247